You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2013/05/07 20:22:17 UTC

svn commit: r1480001 - in /ctakes/trunk/ctakes-clinical-pipeline: data/ data/input/ data/output/ src/main/java/org/apache/ctakes/clinicalpipeline/runtime/

Author: tmill
Date: Tue May  7 18:22:14 2013
New Revision: 1480001

URL: http://svn.apache.org/r1480001
Log:
Checked in some code for running a standard operation (directory of text files to directory of cui files), and an easy to instantiate template for doing similar things with other ctakes types.

Added:
    ctakes/trunk/ctakes-clinical-pipeline/data/
    ctakes/trunk/ctakes-clinical-pipeline/data/input/
    ctakes/trunk/ctakes-clinical-pipeline/data/output/
    ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/runtime/
    ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/runtime/BagOfAnnotationsGenerator.java   (with props)
    ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/runtime/BagOfCUIsGenerator.java   (with props)

Added: ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/runtime/BagOfAnnotationsGenerator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/runtime/BagOfAnnotationsGenerator.java?rev=1480001&view=auto
==============================================================================
--- ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/runtime/BagOfAnnotationsGenerator.java (added)
+++ ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/runtime/BagOfAnnotationsGenerator.java Tue May  7 18:22:14 2013
@@ -0,0 +1,76 @@
+package org.apache.ctakes.clinicalpipeline.runtime;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.lang.reflect.ParameterizedType;
+import java.util.Collection;
+
+import org.apache.ctakes.core.cr.FilesInDirectoryCollectionReader;
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.uima.UIMAException;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.factory.CollectionReaderFactory;
+import org.uimafit.pipeline.JCasIterable;
+import org.uimafit.util.JCasUtil;
+
+public abstract class BagOfAnnotationsGenerator<T extends Annotation,K> {
+
+	private String outputDir = null;
+	private CollectionReader reader = null;
+	private AnalysisEngine ae = null;
+	private static final String defaultAEPath = "desc/analysis_engine/AggregatePlaintextUMLSProcessor.xml";
+	private Class<T> classOfT;
+	
+	/**
+	 * @param args
+	 * @throws IOException 
+	 * @throws UIMAException 
+	 */
+	public BagOfAnnotationsGenerator(String inputDir, String outputDir) throws UIMAException, IOException{
+		this(inputDir, outputDir, null);
+	}
+	
+	public BagOfAnnotationsGenerator(String inputDir, String outputDir, String aePath) throws UIMAException, IOException {
+		reader = CollectionReaderFactory.createCollectionReaderFromPath("../ctakes-core/desc/collection_reader/FilesInDirectoryCollectionReader.xml", 
+				FilesInDirectoryCollectionReader.PARAM_INPUTDIR, inputDir);
+		this.ae = AnalysisEngineFactory.createAnalysisEngineFromPath(aePath == null ? defaultAEPath : aePath);
+		this.outputDir = outputDir;
+		this.classOfT = getClassOfT();
+	}
+	
+	public void process() throws UIMAException, IOException{
+		JCasIterable casIter = new JCasIterable(reader, ae);
+		while(casIter.hasNext()){
+			JCas jcas = casIter.next();
+			String docId = DocumentIDAnnotationUtil.getDocumentID(jcas);
+			
+			// extract info from cas
+			processCas(jcas, outputDir + File.separator + docId);
+		}
+		ae.destroy();
+	}
+
+	private void processCas(JCas jcas, String outputFilename) throws FileNotFoundException {
+		PrintStream out = new PrintStream(outputFilename);
+
+		Collection<T> annotations = JCasUtil.select(jcas, classOfT);
+		for(T annot : annotations){
+			K output = extractInformation(annot);
+			if(output != null) out.println(output);
+		}
+		out.close();
+	}
+	
+	protected abstract K extractInformation(T t);
+	
+	private Class<T> getClassOfT() {
+		ParameterizedType superclass = (ParameterizedType) getClass().getGenericSuperclass();
+		return (Class<T>) superclass.getActualTypeArguments()[0];
+	}
+}

Propchange: ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/runtime/BagOfAnnotationsGenerator.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/runtime/BagOfCUIsGenerator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/runtime/BagOfCUIsGenerator.java?rev=1480001&view=auto
==============================================================================
--- ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/runtime/BagOfCUIsGenerator.java (added)
+++ ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/runtime/BagOfCUIsGenerator.java Tue May  7 18:22:14 2013
@@ -0,0 +1,52 @@
+package org.apache.ctakes.clinicalpipeline.runtime;
+
+import java.io.IOException;
+import java.util.HashSet;
+
+import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.uima.UIMAException;
+import org.apache.uima.jcas.cas.FSArray;
+
+public class BagOfCUIsGenerator extends BagOfAnnotationsGenerator<IdentifiedAnnotation, String> {
+
+	public BagOfCUIsGenerator(String inputDir, String outputDir)
+			throws UIMAException, IOException {
+		super(inputDir, outputDir);
+	}
+
+	@Override
+	protected String extractInformation(IdentifiedAnnotation t) {
+		StringBuilder buff = new StringBuilder();
+		
+		FSArray mentions = t.getOntologyConceptArr();
+		
+		HashSet<String> uniqueCuis = new HashSet<String>();
+		if(mentions == null) return null;
+		for(int i = 0; i < mentions.size(); i++){
+			if(mentions.get(i) instanceof UmlsConcept){
+				UmlsConcept concept = (UmlsConcept) mentions.get(i);
+				uniqueCuis.add(concept.getCui());
+			}
+		}
+		
+		for(String cui : uniqueCuis){
+			if(t.getPolarity() == CONST.NE_POLARITY_NEGATION_PRESENT) buff.append("-");
+			buff.append(cui);
+			buff.append("\n");
+		}
+		
+		if(buff.length() == 0) return null;
+		return buff.substring(0,buff.length()-1);
+	}
+	/**
+	 * @param args
+	 * @throws IOException 
+	 * @throws UIMAException 
+	 */
+	public static void main(String[] args) throws UIMAException, IOException {
+		(new BagOfCUIsGenerator("data/input", "data/output")).process();
+	}
+
+}

Propchange: ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/runtime/BagOfCUIsGenerator.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain