You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2014/04/16 18:58:42 UTC
svn commit: r1587978 -
/ctakes/trunk/ctakes-core/src/test/java/org/apache/ctakes/core/ae/SentenceDetectorAnnotatorTest.java
Author: tmill
Date: Wed Apr 16 16:58:42 2014
New Revision: 1587978
URL: http://svn.apache.org/r1587978
Log:
CTAKES-70: Adds unit test for sentence detector.
Added:
ctakes/trunk/ctakes-core/src/test/java/org/apache/ctakes/core/ae/SentenceDetectorAnnotatorTest.java
Added: ctakes/trunk/ctakes-core/src/test/java/org/apache/ctakes/core/ae/SentenceDetectorAnnotatorTest.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/test/java/org/apache/ctakes/core/ae/SentenceDetectorAnnotatorTest.java?rev=1587978&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/test/java/org/apache/ctakes/core/ae/SentenceDetectorAnnotatorTest.java (added)
+++ ctakes/trunk/ctakes-core/src/test/java/org/apache/ctakes/core/ae/SentenceDetectorAnnotatorTest.java Wed Apr 16 16:58:42 2014
@@ -0,0 +1,90 @@
+package org.apache.ctakes.core.ae;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.util.Collection;
+
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.UIMAException;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.junit.Test;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.factory.JCasFactory;
+import org.uimafit.pipeline.SimplePipeline;
+import org.uimafit.util.JCasUtil;
+
+public class SentenceDetectorAnnotatorTest {
+
+ public static final String note = "" +
+ "Medications:\n" +
+ "Hibernol, jamitol, triopenin, sproingo\n\n" +
+ "Physical exam:\n" +
+ "Patient is doing fine but probably taking too many fictional drugs. Cholesterol is acceptable. Heartrate is elevated. \n" +
+ "Instructions:\n" +
+ "Patient should quit smoking and taunting sharks.";
+
+ @Test
+ public void testSentenceDetectorInitialization() throws UIMAException, IOException{
+
+ JCas jcas = JCasFactory.createJCas();
+ jcas.setDocumentText(note);
+ SimplePipeline.runPipeline(jcas, getSegmentingPipeline());
+
+ Collection<Segment> segs = JCasUtil.select(jcas, Segment.class);
+ assertEquals(segs.size(), 3);
+
+ // test # sentences -- if it skips MEDS and Instructions it should be 3 from the physical exam section only.
+ Collection<Sentence> sents = JCasUtil.select(jcas, Sentence.class);
+ assertEquals(sents.size(), 3);
+
+ jcas = JCasFactory.createJCas();
+ jcas.setDocumentText(note);
+ SimplePipeline.runPipeline(jcas, getBasicPipeline());
+ segs = JCasUtil.select(jcas, Segment.class);
+ assertEquals(segs.size(), 1);
+
+ // test # sentences -- if it skips MEDS and Instructions it should be 3 from the physical exam section only.
+ sents = JCasUtil.select(jcas, Sentence.class);
+ assertEquals(sents.size(), 8);
+ }
+
+ private static AnalysisEngine getSegmentingPipeline() throws ResourceInitializationException{
+ AggregateBuilder aggregateBuilder = new AggregateBuilder();
+
+ // identify segments; use simple segment annotator on non-mayo notes
+ // aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SegmentsFromBracketedSectionTagsAnnotator.class));
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(CDASegmentAnnotator.class));
+
+ // identify sentences
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+ SentenceDetector.class,
+ SentenceDetector.SD_MODEL_FILE_PARAM,
+ "org/apache/ctakes/core/sentdetect/sd-med-model.zip",
+ SentenceDetector.PARAM_SEGMENTS_TO_SKIP,
+ new String[]{"2.16.840.1.113883.10.20.22.2.1.1" /*Medications*/, "2.16.840.1.113883.10.20.22.2.45" /*Instructions*/}));
+
+ return aggregateBuilder.createAggregate();
+ }
+
+ private static AnalysisEngine getBasicPipeline() throws ResourceInitializationException{
+ AggregateBuilder aggregateBuilder = new AggregateBuilder();
+
+ // identify segments; use simple segment annotator on non-mayo notes
+ // aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SegmentsFromBracketedSectionTagsAnnotator.class));
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SimpleSegmentAnnotator.class));
+
+ // identify sentences
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+ SentenceDetector.class,
+ SentenceDetector.SD_MODEL_FILE_PARAM,
+ "org/apache/ctakes/core/sentdetect/sd-med-model.zip"
+ ));
+
+ return aggregateBuilder.createAggregate();
+ }
+}