You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ch...@apache.org on 2013/05/30 19:26:52 UTC
svn commit: r1487943 - in /ctakes/sandbox/ctakes-sectionizer: ./ .settings/
src/ src/main/ src/main/java/ src/main/java/org/ src/main/java/org/apache/
src/main/java/org/apache/ctakes/ src/main/java/org/apache/ctakes/core/
src/main/java/org/apache/ctake...
Author: chenpei
Date: Thu May 30 17:26:51 2013
New Revision: 1487943
URL: http://svn.apache.org/r1487943
Log:
CTAKES-200 - Add a sectionizer that will normalize to HL7/CCDA standards.
Added:
ctakes/sandbox/ctakes-sectionizer/
ctakes/sandbox/ctakes-sectionizer/.classpath (with props)
ctakes/sandbox/ctakes-sectionizer/.project (with props)
ctakes/sandbox/ctakes-sectionizer/.settings/
ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.core.resources.prefs
ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.jdt.core.prefs
ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.m2e.core.prefs
ctakes/sandbox/ctakes-sectionizer/pom.xml (with props)
ctakes/sandbox/ctakes-sectionizer/src/
ctakes/sandbox/ctakes-sectionizer/src/main/
ctakes/sandbox/ctakes-sectionizer/src/main/java/
ctakes/sandbox/ctakes-sectionizer/src/main/java/org/
ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/
ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/
ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/core/
ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/core/ae/
ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/core/ae/CDASegmentAnnotator.java (with props)
ctakes/sandbox/ctakes-sectionizer/src/main/resources/
ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/
ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/
ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/
ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/core/
ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/core/sections/
ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/core/sections/ccda_sections.txt (with props)
ctakes/sandbox/ctakes-sectionizer/src/test/
ctakes/sandbox/ctakes-sectionizer/src/test/java/
ctakes/sandbox/ctakes-sectionizer/src/test/java/org/
ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/
ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/
ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/core/
ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/core/ae/
ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/core/ae/TestCDASegmentAnnotator.java (with props)
ctakes/sandbox/ctakes-sectionizer/src/test/resources/
Added: ctakes/sandbox/ctakes-sectionizer/.classpath
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-sectionizer/.classpath?rev=1487943&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-sectionizer/.classpath (added)
+++ ctakes/sandbox/ctakes-sectionizer/.classpath Thu May 30 17:26:51 2013
@@ -0,0 +1,36 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+ <classpathentry kind="src" output="target/classes" path="src/main/java">
+ <attributes>
+ <attribute name="optional" value="true"/>
+ <attribute name="maven.pomderived" value="true"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
+ <attributes>
+ <attribute name="maven.pomderived" value="true"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry kind="src" output="target/test-classes" path="src/test/java">
+ <attributes>
+ <attribute name="optional" value="true"/>
+ <attribute name="maven.pomderived" value="true"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources">
+ <attributes>
+ <attribute name="maven.pomderived" value="true"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6">
+ <attributes>
+ <attribute name="maven.pomderived" value="true"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
+ <attributes>
+ <attribute name="maven.pomderived" value="true"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry kind="output" path="target/classes"/>
+</classpath>
Propchange: ctakes/sandbox/ctakes-sectionizer/.classpath
------------------------------------------------------------------------------
svn:eol-style = native
Added: ctakes/sandbox/ctakes-sectionizer/.project
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-sectionizer/.project?rev=1487943&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-sectionizer/.project (added)
+++ ctakes/sandbox/ctakes-sectionizer/.project Thu May 30 17:26:51 2013
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+ <name>ctakes-sectionizer</name>
+ <comment></comment>
+ <projects>
+ </projects>
+ <buildSpec>
+ <buildCommand>
+ <name>org.eclipse.jdt.core.javabuilder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ <buildCommand>
+ <name>org.eclipse.m2e.core.maven2Builder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ </buildSpec>
+ <natures>
+ <nature>org.eclipse.jdt.core.javanature</nature>
+ <nature>org.eclipse.m2e.core.maven2Nature</nature>
+ </natures>
+</projectDescription>
Propchange: ctakes/sandbox/ctakes-sectionizer/.project
------------------------------------------------------------------------------
svn:eol-style = native
Added: ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.core.resources.prefs
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.core.resources.prefs?rev=1487943&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.core.resources.prefs (added)
+++ ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.core.resources.prefs Thu May 30 17:26:51 2013
@@ -0,0 +1,6 @@
+eclipse.preferences.version=1
+encoding//src/main/java=UTF-8
+encoding//src/main/resources=UTF-8
+encoding//src/test/java=UTF-8
+encoding//src/test/resources=UTF-8
+encoding/<project>=UTF-8
Added: ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.jdt.core.prefs
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.jdt.core.prefs?rev=1487943&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.jdt.core.prefs (added)
+++ ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.jdt.core.prefs Thu May 30 17:26:51 2013
@@ -0,0 +1,5 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
+org.eclipse.jdt.core.compiler.compliance=1.6
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.source=1.6
Added: ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.m2e.core.prefs
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.m2e.core.prefs?rev=1487943&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.m2e.core.prefs (added)
+++ ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.m2e.core.prefs Thu May 30 17:26:51 2013
@@ -0,0 +1,4 @@
+activeProfiles=
+eclipse.preferences.version=1
+resolveWorkspaceProjects=true
+version=1
Added: ctakes/sandbox/ctakes-sectionizer/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-sectionizer/pom.xml?rev=1487943&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-sectionizer/pom.xml (added)
+++ ctakes/sandbox/ctakes-sectionizer/pom.xml Thu May 30 17:26:51 2013
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.ctakes</groupId>
+ <artifactId>ctakes</artifactId>
+ <version>3.1.0-SNAPSHOT</version>
+ </parent>
+ <artifactId>ctakes-sectionizer</artifactId>
+ <name>ctakes-sectionizer</name>
+ <description>ctakes-sectionizer</description>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.ctakes</groupId>
+ <artifactId>ctakes-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.cleartk</groupId>
+ <artifactId>cleartk-util</artifactId>
+ </dependency>
+ </dependencies>
+</project>
\ No newline at end of file
Propchange: ctakes/sandbox/ctakes-sectionizer/pom.xml
------------------------------------------------------------------------------
svn:eol-style = native
Added: ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/core/ae/CDASegmentAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/core/ae/CDASegmentAnnotator.java?rev=1487943&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/core/ae/CDASegmentAnnotator.java (added)
+++ ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/core/ae/CDASegmentAnnotator.java Thu May 30 17:26:51 2013
@@ -0,0 +1,142 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.ctakes.core.ae;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.net.URI;
+import java.net.URL;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.uimafit.descriptor.ConfigurationParameter;
+
+/**
+ * Creates segment annotations based on the ccda_sections.txt file Which is
+ * based on HL7/CCDA/LONIC standard headings Additional custom heading names can
+ * be added to the file.
+ */
+public class CDASegmentAnnotator extends JCasAnnotator_ImplBase {
+
+ Logger logger = Logger.getLogger(this.getClass());
+ protected static HashMap<String, Pattern> patterns = new HashMap<String, Pattern>();
+ protected static final String DEFAULT_SECTION_FILE_NAME = "org/apache/ctakes/core/sections/ccda_sections.txt";
+ // Field seperator assumes the sections mapping file is comma delimited
+ // unlikely to change.
+ public static final String PARAM_FIELD_SEPERATOR = ",";
+ public static final String PARAM_COMMENT = "#";
+ public static final String PARAM_SECTIONS_FILE = "sections_file";
+
+ @ConfigurationParameter(name = PARAM_SECTIONS_FILE, description = "Path to File that contains the section header mappings")
+ protected URI sections_path;
+
+ /**
+ * Init and load the sections mapping file and precompile the regex matches
+ * into a hashmap
+ */
+ public void initialize(UimaContext aContext)
+ throws ResourceInitializationException {
+ super.initialize(aContext);
+ String sectionFile = null;
+ try {
+ sectionFile = (String) aContext
+ .getConfigParameterValue(PARAM_SECTIONS_FILE);
+ URL sectionURL = (this.sections_path == null) ? this.getClass()
+ .getClassLoader().getResource(DEFAULT_SECTION_FILE_NAME)
+ .toURI().toURL() : this.sections_path.toURL();
+
+ BufferedReader br = new BufferedReader(new InputStreamReader(
+ sectionURL.openStream()));
+
+ // Read in the Section Mappings File
+ // And load the RegEx Patterns into a Map
+ logger.info("Reading Section File " + sectionURL);
+ String line = null;
+ while ((line = br.readLine()) != null) {
+ if (!line.trim().startsWith(PARAM_COMMENT)) {
+ String[] l = line.split(PARAM_FIELD_SEPERATOR);
+ // First column is the HL7 section template id
+ if (l != null && l.length > 0 && l[0] != null
+ && l[0].length() > 0
+ && !line.endsWith(PARAM_FIELD_SEPERATOR)) {
+ String id = l[0].trim();
+ // Make a giant alternator (|) regex group for each HL7
+ Pattern p = buildPattern(l);
+ patterns.put(id, p);
+ } else {
+ logger.info("Warning: Skipped reading sections config row: "
+ + Arrays.toString(l));
+ }
+ }
+ }
+ } catch (Exception e) {
+ logger.error("Error reading Sections file:" + sectionFile);
+ throw new ResourceInitializationException(e);
+ }
+ }
+
+ /**
+ * Build a regex pattern from a list of section names. used only during init
+ * time
+ */
+ private static Pattern buildPattern(String[] line) {
+ StringBuffer sb = new StringBuffer();
+ for (int i = 1; i < line.length; i++) {
+ sb.append(line[i].trim());
+ if (i != line.length - 1) {
+ sb.append("|");
+ }
+ }
+ Pattern p = Pattern.compile("(" + sb + ")", Pattern.CASE_INSENSITIVE);
+ return p;
+ }
+
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ String text = jCas.getDocumentText();
+ if (text == null) {
+ String docId = DocumentIDAnnotationUtil.getDocumentID(jCas);
+ logger.info("text is null for docId=" + docId, null);
+ } else {
+ for (String id : patterns.keySet()) {
+ Pattern p = patterns.get(id);
+ // System.out.println("Pattern" + p);
+ Matcher m = p.matcher(text);
+ while (m.find()) {
+ Segment segment = new Segment(jCas);
+ segment.setBegin(m.start());
+ // TODO: Need to figure out the end of the section
+ segment.setEnd(m.end());
+ segment.setId(id);
+ segment.addToIndexes();
+ }
+ }
+ }
+ }
+}
Propchange: ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/core/ae/CDASegmentAnnotator.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/core/sections/ccda_sections.txt
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/core/sections/ccda_sections.txt?rev=1487943&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/core/sections/ccda_sections.txt (added)
+++ ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/core/sections/ccda_sections.txt Thu May 30 17:26:51 2013
@@ -0,0 +1,34 @@
+# This file is used by ctakes-core/sectionizer
+# It uses rules and RegEx to match the section headers
+# It is derived from the Consolidated CDA/HL7 standard
+# http://bluebuttonplus.org/healthrecords.html
+# http://cdatools.org/infocenter/index.jsp
+# The format is as follows:
+# HL7 template id, LOINC Section Code, n list of header names
+# Custom ones can be added to the below mapping file
+# By Default, they are case insenstive and spaces trimmed.
+
+2.16.840.1.113883.10.20.22.1.1, 34133-9, Header, Patient information and demographics
+2.16.840.1.113883.10.20.22.2.6.1, 48765-2, Allergies, Adverse Reactions, Alerts
+2.16.840.1.113883.10.20.22.2.22.1, 46240-8, History of encounters, Encounters,Surgeries, ED visits
+2.16.840.1.113883.10.20.22.2.2.1, 11369-6, History of immunizations, Immunizations,Immunizations and vaccines
+2.16.840.1.113883.10.20.22.2.1.1, 10160-0, HISTORY OF MEDICATION USE, Medications
+2.16.840.1.113883.10.20.22.2.10, 18776-5, Treatment plan, Care Plan
+2.16.840.1.113883.10.20.22.2.11.1, 10183-2, HOSPITAL DISCHARGE MEDICATIONS, Discharge Medications
+1.3.6.1.4.1.19376.1.5.3.1.3.1, 42349-1, Reason for Referral
+2.16.840.1.113883.10.20.22.2.5.1, 11450-4, PROBLEMS, Problem List, Concerns, complaints, observations
+2.16.840.1.113883.10.20.22.2.7.1, 47519-4, Procedures, History of procedures
+2.16.840.1.113883.10.20.22.2.14, 47420-5, FUNCTIONAL STATUS, Functional and Cognitive Status, impairments
+2.16.840.1.113883.10.20.22.2.3.1, 30954-2, Results, laboratory tests, LABORATORY INFORMATION
+2.16.840.1.113883.10.20.22.2.17, 29762-2, Social History, Observations like smoking, drinking
+2.16.840.1.113883.10.20.22.2.4.1, 8716-3, Vital Signs,height, weight, blood pressure
+2.16.840.1.113883.10.20.22.2.41, 8653-8, HOSPITAL DISCHARGE INSTRUCTIONS, Discharge Instructions, Written discharge instructions
+
+2.16.840.1.113883.10.20.22.2.15, 10157-6, Family History
+1.3.6.1.4.1.19376.1.5.3.1.1.13.2.1, 10154-3, CHIEF COMPLAINT
+2.16.840.1.113883.10.20.22.2.37,55109-3, Complications
+2.16.840.1.113883.10.20.22.2.20, 11348-0, HISTORY OF PAST ILLNESS
+1.3.6.1.4.1.19376.1.5.3.1.3.4, 10164-2, HISTORY OF PRESENT ILLNESS
+2.16.840.1.113883.10.20.2.5, 10210-3, GENERAL STATUS
+2.16.840.1.113883.10.20.22.2.24, 11535-2, Hospital Discharge Diagnosis
+2.16.840.1.113883.10.20.22.2.16, 11493-4, Hospital Discharge Studies Summary
\ No newline at end of file
Propchange: ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/core/sections/ccda_sections.txt
------------------------------------------------------------------------------
svn:eol-style = native
Added: ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/core/ae/TestCDASegmentAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/core/ae/TestCDASegmentAnnotator.java?rev=1487943&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/core/ae/TestCDASegmentAnnotator.java (added)
+++ ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/core/ae/TestCDASegmentAnnotator.java Thu May 30 17:26:51 2013
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.core.ae;
+
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.metadata.TypeSystemDescription;
+import org.cleartk.util.cr.FilesCollectionReader;
+import org.junit.Test;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.factory.CollectionReaderFactory;
+import org.uimafit.factory.TypeSystemDescriptionFactory;
+import org.uimafit.pipeline.SimplePipeline;
+import org.uimafit.util.JCasUtil;
+
+public class TestCDASegmentAnnotator {
+
+ public static String INPUT_FILE = "../ctakes-regression-test/testdata/input/plaintext/doc2_07543210_sample_current.txt";
+
+ @Test
+ public void TestCDASegmentPipeLine() throws Exception {
+
+ TypeSystemDescription typeSystem = TypeSystemDescriptionFactory
+ .createTypeSystemDescription();
+
+ CollectionReader reader1 = CollectionReaderFactory
+ .createCollectionReader(FilesCollectionReader.class,
+ typeSystem, FilesCollectionReader.PARAM_ROOT_FILE,
+ INPUT_FILE);
+
+ AnalysisEngine sectionAnnotator = AnalysisEngineFactory
+ .createPrimitive(CDASegmentAnnotator.class, typeSystem);
+ AnalysisEngine dumpOutput = AnalysisEngineFactory.createPrimitive(
+ DumpOutputAE.class, typeSystem);
+
+ SimplePipeline.runPipeline(reader1, sectionAnnotator, dumpOutput);
+ }
+
+ public static class DumpOutputAE extends JCasAnnotator_ImplBase {
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ for (Segment segment : JCasUtil.select(jCas, Segment.class)) {
+ System.out.println("Segment:" + segment.getId());
+ }
+ }
+ }
+}
Propchange: ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/core/ae/TestCDASegmentAnnotator.java
------------------------------------------------------------------------------
svn:eol-style = native