You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2014/04/27 00:10:39 UTC
svn commit: r1590314 -
/ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/ClinicalPipelineFactory.java
Author: tmill
Date: Sat Apr 26 22:10:39 2014
New Revision: 1590314
URL: http://svn.apache.org/r1590314
Log:
CTAKES-297: First pass at some default pipelines. Full dictionary pipeline still WIP but basics are set up.
Added:
ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/ClinicalPipelineFactory.java
Added: ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/ClinicalPipelineFactory.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/ClinicalPipelineFactory.java?rev=1590314&view=auto
==============================================================================
--- ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/ClinicalPipelineFactory.java (added)
+++ ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/ClinicalPipelineFactory.java Sat Apr 26 22:10:39 2014
@@ -0,0 +1,110 @@
+package org.apache.ctakes.clinicalpipeline;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.ctakes.chunker.ae.Chunker;
+import org.apache.ctakes.chunker.ae.adjuster.ChunkAdjuster;
+import org.apache.ctakes.contexttokenizer.ae.ContextDependentTokenizerAnnotator;
+import org.apache.ctakes.core.ae.SentenceDetector;
+import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
+import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
+import org.apache.ctakes.lvg.ae.LvgAnnotator;
+import org.apache.ctakes.postagger.POSTagger;
+import org.apache.ctakes.typesystem.type.syntax.Chunk;
+import org.apache.ctakes.typesystem.type.textspan.LookupWindowAnnotation;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.util.JCasUtil;
+import org.xml.sax.SAXException;
+
+public class ClinicalPipelineFactory {
+
+ public static AnalysisEngineDescription getDefaultPipeline() throws ResourceInitializationException{
+ AggregateBuilder builder = new AggregateBuilder();
+ builder.add(getTokenProcessingPipeline());
+ builder.add(AnalysisEngineFactory.createPrimitiveDescription(CopyNPChunksToLookupWindowAnnotations.class));
+ builder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveEnclosedLookupWindows.class));
+// builder.add(DictionaryLookupAnnotator.createAnnotatorDescription());
+
+ throw new UnsupportedOperationException("Not yet implemented!");
+
+ //return builder.createAggregateDescription();
+ }
+
+ // TODO
+ public static AnalysisEngineDescription getParsingPipeline(){
+ AggregateBuilder builder = new AggregateBuilder();
+ throw new UnsupportedOperationException("Not yet implemented!");
+ }
+
+ public static AnalysisEngineDescription getTokenProcessingPipeline() throws ResourceInitializationException {
+ AggregateBuilder builder = new AggregateBuilder();
+ builder.add(SimpleSegmentAnnotator.createAnnotatorDescription());
+ builder.add(SentenceDetector.createAnnotatorDescription());
+ builder.add(TokenizerAnnotatorPTB.createAnnotatorDescription());
+ builder.add(LvgAnnotator.createAnnotatorDescription());
+ builder.add(ContextDependentTokenizerAnnotator.createAnnotatorDescription());
+ builder.add(POSTagger.createAnnotatorDescription());
+ builder.add(Chunker.createAnnotatorDescription());
+ builder.add(getStandardChunkAdjusterAnnotator());
+
+ return builder.createAggregateDescription();
+ }
+
+ public static AnalysisEngineDescription getStandardChunkAdjusterAnnotator() throws ResourceInitializationException{
+ AggregateBuilder builder = new AggregateBuilder();
+ // adjust NP in NP NP to span both
+ builder.add(ChunkAdjuster.createAnnotatorDescription(new String[] { "NP", "NP" }, 1));
+ // adjust NP in NP PP NP to span all three
+ builder.add(ChunkAdjuster.createAnnotatorDescription(new String[] { "NP", "PP", "NP" }, 2));
+ return builder.createAggregateDescription();
+ }
+
+ public static class CopyNPChunksToLookupWindowAnnotations extends JCasAnnotator_ImplBase {
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ for (Chunk chunk : JCasUtil.select(jCas, Chunk.class)) {
+ if (chunk.getChunkType().equals("NP")) {
+ new LookupWindowAnnotation(jCas, chunk.getBegin(), chunk.getEnd()).addToIndexes();
+ }
+ }
+ }
+ }
+
+ public static class RemoveEnclosedLookupWindows extends JCasAnnotator_ImplBase {
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ List<LookupWindowAnnotation> lws = new ArrayList<>(JCasUtil.select(jCas, LookupWindowAnnotation.class));
+ // we'll navigate backwards so that as we delete things we shorten the list from the back
+ for(int i = lws.size()-2; i >= 0; i--){
+ LookupWindowAnnotation lw1 = lws.get(i);
+ LookupWindowAnnotation lw2 = lws.get(i+1);
+ if(lw1.getBegin() <= lw2.getBegin() && lw1.getEnd() >= lw2.getEnd()){
+ /// lw1 envelops or encloses lw2
+ lws.remove(i+1);
+ lw2.removeFromIndexes();
+ }
+ }
+
+ }
+
+ }
+
+ public static void main(String[] args) throws FileNotFoundException, SAXException, IOException, ResourceInitializationException{
+ AnalysisEngineDescription aed = getDefaultPipeline();
+ aed.toXML(new PrintWriter("desc/DefaultPipeline.xml"));
+
+ // TODO And so on for other aggregates...
+ }
+}