You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by br...@apache.org on 2013/07/07 21:23:07 UTC
svn commit: r1500511 [4/6] - in /ctakes/sandbox/ctakes-scrubber-deid/src: ./
main/ main/java/ main/java/org/ main/java/org/apache/
main/java/org/apache/uima/ main/java/org/apache/uima/examples/
main/java/org/spin/ main/java/org/spin/scrubber/ main/java...
Added: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/consumer/AnnotationPrinter.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/consumer/AnnotationPrinter.java?rev=1500511&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/consumer/AnnotationPrinter.java (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/consumer/AnnotationPrinter.java Sun Jul 7 19:23:05 2013
@@ -0,0 +1,574 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ ******************************************************************************/
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.spin.scrubber.uima.consumer;
+
+import org.apache.ctakes.typesystem.type.syntax.Chunk;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.collection.CasConsumer_ImplBase;
+import org.apache.uima.collection.base_cpm.CasObjectProcessor;
+import org.apache.uima.examples.SourceDocumentInformation;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceConfigurationException;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.ResourceProcessException;
+import org.apache.uima.util.ProcessTrace;
+import org.spin.scrubber.uima.type.Calculation;
+import org.spin.scrubber.uima.type.OntologyMatch;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.Date;
+import java.util.Iterator;
+
+/**
+ * An example of CAS Consumer. <br>
+ * AnnotationPrinter prints to an output file all annotations in the CAS. <br>
+ * Parameters needed by the AnnotationPrinter are
+ * <ol>
+ * <li> "outputFile" : file to which the output files should be written.</li>
+ * </ol>
+ * <br>
+ * These parameters are set in the initialize method to the values specified in the descriptor file.
+ * <br>
+ * These may also be set by the application by using the setConfigParameterValue methods.
+ *
+ *
+ */
+
+@Deprecated
+public class AnnotationPrinter extends CasConsumer_ImplBase implements CasObjectProcessor
+{
+ File outFile;
+
+ FileWriter fileWriter;
+
+ public AnnotationPrinter() {
+ }
+
+ /**
+ * Initializes this CAS Consumer with the parameters specified in the descriptor.
+ *
+ * @throws ResourceInitializationException
+ * if there is error in initializing the resources
+ */
+ public void initialize() throws ResourceInitializationException {
+
+ // extract configuration parameter settings
+ String oPath = (String) getUimaContext().getConfigParameterValue("outputFile");
+
+ // Output file should be specified in the descriptor
+ if (oPath == null) {
+ throw new ResourceInitializationException(
+ ResourceInitializationException.CONFIG_SETTING_ABSENT, new Object[] { "outputFile" });
+ }
+ // If specified output directory does not exist, try to create it
+ outFile = new File(oPath.trim());
+ if (outFile.getParentFile() != null && !outFile.getParentFile().exists()) {
+ if (!outFile.getParentFile().mkdirs())
+ throw new ResourceInitializationException(
+ ResourceInitializationException.RESOURCE_DATA_NOT_VALID, new Object[] { oPath,
+ "outputFile" });
+ }
+ try {
+ fileWriter = new FileWriter(outFile);
+ } catch (IOException e) {
+ throw new ResourceInitializationException(e);
+ }
+ }
+
+ /**
+ * Processes the CasContainer which was populated by the TextAnalysisEngines. <br>
+ * In this case, the CAS index is iterated over selected annotations and printed out into an
+ * output file
+ *
+ * @param aCAS
+ * CasContainer which has been populated by the TAEs
+ *
+ * @throws ResourceProcessException
+ * if there is an error in processing the Resource
+ *
+ * @see org.apache.uima.collection.base_cpm.CasObjectProcessor#processCas(CAS)
+ */
+ public synchronized void processCas(CAS aCAS) throws ResourceProcessException {
+ JCas jcas;
+ try {
+ jcas = aCAS.getJCas();
+ } catch (CASException e) {
+ throw new ResourceProcessException(e);
+ }
+
+
+ boolean titleP = false;
+ String docUri = null;
+ Iterator it = jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
+ if (it.hasNext()) {
+ SourceDocumentInformation srcDocInfo = (SourceDocumentInformation) it.next();
+ docUri = srcDocInfo.getUri();
+ }
+
+ try
+ {
+ fileWriter.write("\n\n<++++NEW DOCUMENT++++>\t"+new Date(System.currentTimeMillis())+"\n");
+ if (docUri != null)
+ fileWriter.write("DOCUMENT URI:" + docUri + "\n");
+ fileWriter.write("\n");
+ } catch (IOException e1)
+ {
+ // TODO Auto-generated catch block
+ e1.printStackTrace();
+ }
+
+ this.printNounPhraseAnnotations(fileWriter, jcas);
+ this.printWordTokenAnnotations(fileWriter, jcas);
+ this.printOntologyMatch(fileWriter, jcas);
+ this.printCalculationAnnotations(fileWriter, jcas);
+
+
+//TODO: remove ???
+// // iterate and print annotations
+// Iterator annotationIter = jcas.getAnnotationIndex().iterator();
+// while (annotationIter.hasNext()) {
+// Annotation annot = (Annotation) annotationIter.next();
+// if (titleP == false) {
+// try {
+// fileWriter.write("\n\n<++++NEW DOCUMENT++++>\t"+new Date(System.currentTimeMillis())+"\n");
+// if (docUri != null)
+// fileWriter.write("DOCUMENT URI:" + docUri + "\n");
+// fileWriter.write("\n");
+// } catch (IOException e) {
+// throw new ResourceProcessException(e);
+// }
+// titleP = true;
+// }
+// // get the text that is enclosed within the annotation in the CAS
+// String aText = annot.getCoveredText();
+// aText = aText.replace('\n', ' ');
+// aText = aText.replace('\r', ' ');
+//
+// //if aText is empty, continue;
+// if (aText.trim().length()<1)
+// {
+// continue;
+// }
+//
+// //System.out.println("TYPE: "+annot.getType().getShortName());
+//
+// //get Features... //TODO: there must be a better way to do this...
+// Feature posFeat=null;
+// String posTag=null;
+// try
+// {
+// String featName = (String) getUimaContext().getConfigParameterValue("pos");
+// posFeat = annot.getCAS().getTypeSystem().getFeatureByFullName(featName);
+// if (posFeat!=null)
+// posTag = annot.getFeatureValueAsString(posFeat);
+// }
+// catch(Exception e)
+// {
+// System.out.println("CONTINUABLE ERROR: unable to get pos feature value. may be null. continuing.");
+// //e.printStackTrace();
+// }
+//// //only print if pos = {noun, null}
+//// if( posTag!=null && !posTag.startsWith("nn") && !posTag.startsWith("np") )
+//// {
+//// continue;
+//// }
+//
+// //only print WordToken annots
+// //TODO: fix this, there are probably other Types we want, numbers for example for an address.
+// //this if block is here to prevent the same token from being printed multiple times for each Type, ex. lookupwindow, wordtoken, segment, etc...
+// if (!annot.getType().getShortName().equalsIgnoreCase("WordToken") && !annot.getType().getShortName().equalsIgnoreCase("OntologyMatch"))
+// {
+// continue;
+// }
+//
+//
+// String ontArrTag=null;
+//// try
+//// {
+// //1
+//// NamedEntity ne = new NamedEntity(annot.getCAS().getJCas());
+//// FSArray fsArr = ne.getOntologyConceptArr();
+//// for (int i=0; i<fsArr.size(); i++)
+//// {
+//// FeatureStructure fsStruct = fsArr.get(i);
+//// ontArrTag = fsStruct.toString();
+//// }
+//
+// //2
+//// String ontArrName = (String) getUimaContext().getConfigParameterValue("ontologyConceptArr");
+//// Feature ontArrFeat=null;
+//// FeatureStructure ontArrFS=null;
+//// ontArrFeat = annot.getCAS().getTypeSystem().getFeatureByFullName(ontArrName);
+//// if (ontArrFeat!=null)
+//// {
+////// ontArrTag = annot.getFeatureValueAsString(ontArrFeat);
+//// ontArrFS = annot.getFeatureValue(ontArrFeat);
+//// ontArrTag += ontArrFS.toString();
+//// }
+//
+//// //3 - this gets all the annotations of type NamedEntity from this jcas.
+//// //which means it gives all the cui's for the entire doc on every word.
+//// //what we want is only the cuis for the current annot.
+//// Iterator neIt = annot.getCAS().getJCas().getAnnotationIndex(NamedEntity.type).iterator();
+//// while(neIt.hasNext())
+//// {
+//// NamedEntity ne = (NamedEntity)neIt.next();
+//// if (ne.getBegin()==annot.getBegin()) //only print NE annotations if they match the current annot start position.
+//// {
+//// FSArray fsArr = ne.getOntologyConceptArr();
+//// for (int i=0; i<fsArr.size(); i++)
+//// {
+//// FeatureStructure fsStruct = fsArr.get(i);
+//// ontArrTag += "\n"+fsStruct.toString();
+//// // System.out.println(fsStruct.getCAS().getJCas().get)
+//// System.out.println("ontArrTag: " + ontArrTag);
+//// }
+//// }
+//// }
+//// }
+//// catch(Exception e)
+//// {
+//// System.out.println("CONTINUABLE ERROR: unable to get ontologyConceptArr feature value. may be null. continuing.");
+//// e.printStackTrace();
+//// }
+//
+// try
+// {
+// fileWriter.write("begin: " + annot.getBegin()
+// + "\t end: " + annot.getEnd()
+// + "\t num: " + annot.getType().getNumberOfFeatures()
+// + "\t POS: " + posTag
+// + "\t\t name: " + annot.getType().getShortName()
+// + "\t token: " + aText
+// + "\t ontArr: " + ontArrTag
+//// + "\t name: " + annot.getType().getName()
+// + "\n");
+//// for (Feature f : annot.getType().getFeatures())
+//// {
+//// fileWriter.write("feat: " + f.getName()+ "\n");
+//// }
+//// fileWriter.write("\n\n\n");
+//
+// fileWriter.flush();
+// }
+// catch (IOException e)
+// {
+// throw new ResourceProcessException(e);
+// }
+// }
+ }
+
+ private void printWordTokenAnnotations(FileWriter fileWriter, JCas jcas) throws ResourceProcessException
+ {
+ // iterate and print annotations
+ Iterator annotationIter = jcas.getAnnotationIndex(WordToken.type).iterator();
+ while (annotationIter.hasNext())
+ {
+ WordToken annot = (WordToken) annotationIter.next();
+
+ // get the text that is enclosed within the annotation in the CAS
+ String aText = annot.getCoveredText();
+ aText = aText.replace('\n', ' ');
+ aText = aText.replace('\r', ' ');
+
+ //if aText is empty, continue;
+ if (aText.trim().length()<1)
+ {
+ continue;
+ }
+
+ Feature posFeat=null;
+ String posTag=null;
+ try
+ {
+ String featName = (String) getUimaContext().getConfigParameterValue("pos");
+ posFeat = annot.getCAS().getTypeSystem().getFeatureByFullName(featName);
+ if (posFeat!=null)
+ posTag = annot.getFeatureValueAsString(posFeat);
+ }
+ catch(Exception e)
+ {
+ System.out.println("CONTINUABLE ERROR: unable to get pos feature value. may be null. continuing.");
+ }
+
+ try
+ {
+ fileWriter.write("begin: " + annot.getBegin()
+ + " end: " + annot.getEnd()
+ + "\t code: " + annot.getPartOfSpeech()
+ + "\t cap: " + annot.getCapitalization()
+ + "\t ont: " + "POS"
+ + "\t\t name: " + annot.getType().getShortName()
+ + "\t token: " + aText
+ + "\n");
+
+// for (Feature f : annot.getType().getFeatures())
+// {
+// fileWriter.write("feat: " + f.getName()+ "\n");
+// }
+// fileWriter.write("\n\n\n");
+
+ fileWriter.flush();
+ }
+ catch (IOException e)
+ {
+ throw new ResourceProcessException(e);
+ }
+ }
+ }
+
+ private void printNounPhraseAnnotations(FileWriter fileWriter, JCas jcas) throws ResourceProcessException
+ {
+ // iterate and print annotations
+ Iterator annotationIter = jcas.getAnnotationIndex(Chunk.type).iterator();
+ while (annotationIter.hasNext())
+ {
+ Chunk annot = (Chunk) annotationIter.next();
+
+ // get the text that is enclosed within the annotation in the CAS
+ String aText = annot.getCoveredText();
+ aText = aText.replace('\n', ' ');
+ aText = aText.replace('\r', ' ');
+
+ //if aText is empty, continue;
+ if (aText.trim().length()<1)
+ {
+ continue;
+ }
+
+// //only want NP chunks
+// if (!annot.getChunkType().equalsIgnoreCase("NP"))
+// {
+// continue;
+// }
+
+ try
+ {
+ fileWriter.write("begin: " + annot.getBegin()
+ + " end: " + annot.getEnd()
+ + "\t code: " + annot.getChunkType()
+ + "\t ont: " + "Chunk"
+ + "\t\t name: " + annot.getType().getShortName()
+ + "\t token: " + aText
+ + "\n");
+
+ fileWriter.flush();
+ }
+ catch (IOException e)
+ {
+ throw new ResourceProcessException(e);
+ }
+ }
+ }
+
+ private void printOntologyMatch(FileWriter fileWriter, JCas jcas) throws ResourceProcessException
+ {
+ // iterate and print annotations
+ Iterator<Annotation> annotationIter = jcas.getAnnotationIndex(OntologyMatch.type).iterator();
+ while (annotationIter.hasNext())
+ {
+ OntologyMatch annot = (OntologyMatch) annotationIter.next();
+
+ // get the text that is enclosed within the annotation in the CAS
+ String aText = annot.getCoveredText();
+ aText = aText.replace('\n', ' ');
+ aText = aText.replace('\r', ' ');
+
+ //if aText is empty, continue;
+ if (aText.trim().length()<1)
+ {
+ continue;
+ }
+
+ try
+ {
+ fileWriter.write("begin: " + annot.getBegin()
+ + " end: " + annot.getEnd()
+// + "\t num: " + annot.getType().getNumberOfFeatures()
+ + "\t code: " + annot.getCode()
+ + "\t ont: " + annot.getOntology()
+ + "\t\t name: " + annot.getType().getShortName()
+ + "\t token: " + aText
+ + "\n");
+// for (Feature f : annot.getType().getFeatures())
+// {
+// fileWriter.write("feat: " + f.getName()+ "\n");
+// }
+// fileWriter.write("\n\n\n");
+
+ fileWriter.flush();
+ }
+ catch (IOException e)
+ {
+ throw new ResourceProcessException(e);
+ }
+ }
+ }
+
+ private void printCalculationAnnotations(FileWriter fileWriter, JCas jcas) throws ResourceProcessException
+ {
+ // iterate and print annotations
+ Iterator<Annotation> annotationIter = jcas.getAnnotationIndex(Calculation.type).iterator();
+ while (annotationIter.hasNext())
+ {
+ Calculation annot = (Calculation) annotationIter.next();
+
+ // get the text that is enclosed within the annotation in the CAS
+ String aText = annot.getCoveredText();
+ aText = aText.replace('\n', ' ');
+ aText = aText.replace('\r', ' ');
+
+ //if aText is empty, continue;
+ if (aText.trim().length()<1)
+ {
+ continue;
+ }
+
+ try
+ {
+ fileWriter.write("begin: " + annot.getBegin()
+ + " end: " + annot.getEnd()
+ + "\t value: " + annot.getCalculationValue()
+ + "\t name: " + annot.getCalculationName()
+ + "\t name: " + annot.getType().getShortName()
+ + "\t token: " + aText
+ + "\n");
+
+ fileWriter.flush();
+ }
+ catch (IOException e)
+ {
+ throw new ResourceProcessException(e);
+ }
+ }
+ }
+
+
+ /**
+ * Called when a batch of processing is completed.
+ *
+ * @param aTrace
+ * ProcessTrace object that will log events in this method.
+ * @throws ResourceProcessException
+ * if there is an error in processing the Resource
+ * @throws IOException
+ * if there is an IO Error
+ *
+ * @see org.apache.uima.collection.CasConsumer#batchProcessComplete(ProcessTrace)
+ */
+ public void batchProcessComplete(ProcessTrace aTrace) throws ResourceProcessException,
+ IOException {
+ // nothing to do in this case as AnnotationPrinter doesnot do
+ // anything cumulatively
+ }
+
+ /**
+ * Called when the entire collection is completed.
+ *
+ * @param aTrace
+ * ProcessTrace object that will log events in this method.
+ * @throws ResourceProcessException
+ * if there is an error in processing the Resource
+ * @throws IOException
+ * if there is an IO Error
+ * @see org.apache.uima.collection.CasConsumer#collectionProcessComplete(ProcessTrace)
+ */
+ public void collectionProcessComplete(ProcessTrace aTrace) throws ResourceProcessException,
+ IOException {
+ if (fileWriter != null) {
+ fileWriter.close();
+ }
+ }
+
+ /**
+ * Reconfigures the parameters of this Consumer. <br>
+ * This is used in conjunction with the setConfigurationParameterValue to set the configuration
+ * parameter values to values other than the ones specified in the descriptor.
+ *
+ * @throws ResourceConfigurationException
+ * if the configuration parameter settings are invalid
+ *
+ * @see org.apache.uima.resource.ConfigurableResource#reconfigure()
+ */
+ public void reconfigure() throws ResourceConfigurationException {
+ super.reconfigure();
+ // extract configuration parameter settings
+ String oPath = (String) getUimaContext().getConfigParameterValue("outputFile");
+ File oFile = new File(oPath.trim());
+ // if output file has changed, close exiting file and open new
+ if (!oFile.equals(this.outFile)) {
+ this.outFile = oFile;
+ try {
+ fileWriter.close();
+
+ // If specified output directory does not exist, try to create it
+ if (oFile.getParentFile() != null && !oFile.getParentFile().exists()) {
+ if (!oFile.getParentFile().mkdirs())
+ throw new ResourceConfigurationException(
+ ResourceInitializationException.RESOURCE_DATA_NOT_VALID, new Object[] { oPath,
+ "outputFile" });
+ }
+ fileWriter = new FileWriter(oFile);
+ } catch (IOException e) {
+ throw new ResourceConfigurationException();
+ }
+ }
+ }
+
+ /**
+ * Called if clean up is needed in case of exit under error conditions.
+ *
+ * @see org.apache.uima.resource.Resource#destroy()
+ */
+ public void destroy() {
+ if (fileWriter != null) {
+ try {
+ fileWriter.close();
+ } catch (IOException e) {
+ // ignore IOException on destroy
+ }
+ }
+ }
+
+}
Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/consumer/AnnotationPrinter.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/consumer/CSVAnnotation.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/consumer/CSVAnnotation.java?rev=1500511&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/consumer/CSVAnnotation.java (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/consumer/CSVAnnotation.java Sun Jul 7 19:23:05 2013
@@ -0,0 +1,346 @@
+package org.spin.scrubber.uima.consumer;
+
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.syntax.ContractionToken;
+import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
+import org.apache.ctakes.typesystem.type.syntax.PunctuationToken;
+import org.apache.ctakes.typesystem.type.syntax.SymbolToken;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.log4j.Logger;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.spin.scrubber.uima.type.Calculation;
+import org.spin.scrubber.uima.type.OntologyMatch;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.Writer;
+
+/**
+ * @author Andrew McMurry, MS
+ * Created: 4/4/13
+ */
+public enum CSVAnnotation
+{
+ //UIMA Annotation
+ _Annotation(Annotation.type,
+ Header.annotation_type,
+ Header.filename_short,
+ Header.start_idx,
+ Header.end_idx),
+
+ _Sentence(Sentence.type,
+ Header.annotation_type,
+ Header.filename_short,
+ Header.start_idx,
+ Header.end_idx,
+ Header.sentence_seq),
+
+ _SentenceText(Sentence.type, Header.token),
+
+ //cTakes BaseToken ...*
+ _BaseToken(BaseToken.type),
+ _NewLineToken(NewlineToken.type),
+ _ContractionToken(ContractionToken.type),
+ _PunctuationToken(PunctuationToken.type),
+ _SymbolToken(SymbolToken.type),
+
+ //WordToken
+ _WordToken(WordToken.type,
+ Header.annotation_type,
+ Header.filename_short,
+ Header.start_idx,
+ Header.end_idx,
+ Header.token_seq,
+ Header.pos,
+ Header.cap,
+ Header.token),
+
+ //Ontology Match
+ _OntologyMatch(OntologyMatch.type,
+ Header.annotation_type,
+ Header.filename_short,
+ Header.start_idx,
+ Header.end_idx,
+ Header.match_source,
+ Header.match_value,
+ Header.token),
+
+ //Term Frequency
+ _Calculation(Calculation.type,
+ _OntologyMatch.headers);
+
+
+ //CSV Column Headers
+ public static enum Header
+ {
+ annotation_type,
+ filename_short,
+ start_idx,
+ end_idx,
+
+ sentence_seq,
+ token_seq,
+
+ pos,
+ cap,
+
+ match_source,
+ match_value,
+
+ token
+ }
+
+ // Member Variables
+ public final int type;
+ private final Header[] headers;
+
+ private File file;
+ private Writer writer;
+ private boolean isReady = false;
+
+ CSVAnnotation(int type, Header... headers)
+ {
+ this.type = type;
+ this.headers = headers;
+ }
+
+ //cTakes BaseToken is default
+ CSVAnnotation(int type)
+ {
+ this.type = type;
+ this.headers = new Header[] {
+ Header.annotation_type,
+ Header.filename_short,
+ Header.start_idx,
+ Header.end_idx,
+ Header.token_seq,
+ Header.pos
+ };
+ }
+
+ //File Writer Functions
+
+ private void open() throws IOException
+ {
+ if(!OUTPUT_DIR.exists())
+ {
+ log.debug("Creating OUTPUT_DIR: "+OUTPUT_DIR.getAbsolutePath());
+ OUTPUT_DIR.mkdir();
+ }
+
+ String csv = this.name().replace("_","") + ".csv";
+
+ this.file = new File(OUTPUT_DIR+ File.separator + csv);
+ this.writer = new FileWriter(file); //Default IMPL
+ this.isReady = true;
+
+ log.info("Opened FileWriter "+this.file.getAbsolutePath());
+ }
+
+ public void close() throws IOException
+ {
+ this.writer.close();
+ this.isReady = false;
+
+ log.debug("Closed FileWriter "+file.getAbsolutePath());
+ }
+
+ public static void closeAll() throws IOException
+ {
+ for(CSVAnnotation entry : CSVAnnotation.values())
+ {
+ entry.close();
+ }
+ }
+
+ public void delete()
+ {
+ if(file.exists())
+ file.delete();
+ }
+
+ private void flush() throws IOException
+ {
+ if(isReady) writer.flush();
+ }
+
+ public static void flushAll() throws IOException
+ {
+ for(CSVAnnotation entry : CSVAnnotation.values())
+ {
+ entry.flush();
+ }
+ }
+
+ public void write(String contents) throws IOException
+ {
+ if(!isReady) {open(); writeHeaders();}
+
+ writer.write(contents);
+ }
+
+ private void writeHeaders() throws IOException
+ {
+ write(getHeaders());
+ }
+
+ public String getHeaders()
+ {
+ String[] strings = new String[headers.length];
+
+ for(int h=0; h < headers.length; h++)
+ {
+ strings[h] = headers[h].name();
+ }
+ return getLine(strings);
+ }
+
+ public void writeLine(StringBuilder sb) throws IOException
+ {
+ writeLine(sb.toString());
+ }
+
+ public void writeLine(String ... strings) throws IOException
+ {
+ write(getLine(strings));
+ }
+
+ public void writeLine(String filename_short, int start_idx, int end_idx, int seq, String ... strings) throws IOException
+ {
+ writeLine(filename_short, start_idx, end_idx, seq, true, strings);
+ }
+
+ public void writeLine(String filename_short, int start_idx, int end_idx, String ... strings) throws IOException
+ {
+ writeLine(filename_short, start_idx, end_idx, 0, false, strings);
+ }
+
+ private void writeLine(String filename_short, int start_idx, int end_idx, int seq, boolean hasSeq, String ... strings) throws IOException
+ {
+ String[] indices = hasSeq?
+ asStrings(start_idx, end_idx, seq) :
+ asStrings(start_idx, end_idx);
+
+ write(getCSV(filename_short).append(DELIM).append(
+ getCSV(indices)).
+ toString());
+
+ if(strings.length>0)
+ {
+ write(DELIM);
+ write(getCSV(strings).toString());
+ }
+
+ write(CRLF);
+ }
+
+
+ public static String getLine(String... strings)
+ {
+ return getCSV(strings).append(CRLF).toString();
+ }
+
+ public static String[] asStrings(StringBuilder ... builders)
+ {
+ String[] str = new String[builders.length];
+
+ for(int i=0; i<builders.length; i++)
+ str[i] = builders[i].toString();
+
+ return str;
+ }
+
+ public static String[] asStrings(int ... ints)
+ {
+ String[] str = new String[ints.length];
+
+ for(int i=0; i<ints.length; i++)
+ str[i] = String.valueOf(ints[i]);
+
+ return str;
+ }
+
+ public static StringBuilder getCSV(int ... ints)
+ {
+ return getCSV(asStrings(ints));
+ }
+
+ public static StringBuilder getCSV(StringBuilder ... builders)
+ {
+ return getCSV(asStrings(builders));
+ }
+
+ public static StringBuilder getCSV(String... strings)
+ {
+ StringBuilder sb = new StringBuilder();
+
+ for(int i=0; i < strings.length; i++)
+ {
+ String str = strings[i]==null? "":strings[i];
+
+ sb.append(str);
+
+ if(i!=strings.length-1)
+ {
+ sb.append(DELIM);
+ }
+ }
+ return sb;
+ }
+
+ //Part of Speech
+ public static String getPOS(String pos)
+ {
+ if(pos==null || pos.length()==0) return "0";
+
+ if(pos.contains(",")) return "pos_comma";
+ if(pos.contains(".")) return "pos_period";
+ if(pos.contains(":")) return "pos_colon";
+ if(pos.contains("'")) return "pos_tic";
+ if(pos.contains("(")) return "pos_paren";
+ if(pos.contains(")")) return "pos_paren";
+ if(pos.contains("[")) return "pos_paren";
+ if(pos.contains("]")) return "pos_paren";
+
+ return pos;
+ }
+
+ public static String getCoveredText(String text)
+ {
+ return text.replaceAll(DELIM, enclose("csv")).
+ replaceAll(_escape_tic, enclose("tic")).
+ replaceAll(_escape_slash, enclose("slash")).
+ replaceAll(_escape_crlf, enclose("crlf")).
+ replaceAll(CRLF, enclose("crlf"));
+ }
+
+ private static String enclose(String text) {return "("+text+")";}
+
+ public static StringBuilder getCSVBaseToken(String filename_short, BaseToken baseToken) throws IOException
+ {
+ return getCSV(filename_short, baseToken).append(DELIM).
+ append(getCSV(asStrings(baseToken.getTokenNumber()))).append(DELIM).
+ append(getCSV(getPOS(baseToken.getPartOfSpeech())));
+ }
+
+ public static StringBuilder getCSV(String filename_short, Annotation annot) throws IOException
+ {
+ return getCSV(annot.getType().getShortName(), filename_short).append(DELIM).append(
+ getCSV(asStrings(annot.getBegin(), annot.getEnd())));
+ }
+
+ public static File OUTPUT_DIR = new File("csv");
+
+ public static String DELIM = ",";
+ public static String CRLF = System.getProperty("line.separator");
+
+ public static String _escape_tic = "'";
+ public static String _escape_slash = "\\\\";
+ public static String _escape_crlf = "(\\\\r)?\\\\n";
+
+ public static Logger log = Logger.getLogger(CSVAnnotation.class);
+ public static boolean DEBUG = log.isDebugEnabled();
+
+}
Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/consumer/CSVAnnotation.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/consumer/CSVAnnotationConsumer.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/consumer/CSVAnnotationConsumer.java?rev=1500511&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/consumer/CSVAnnotationConsumer.java (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/consumer/CSVAnnotationConsumer.java Sun Jul 7 19:23:05 2013
@@ -0,0 +1,387 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ ******************************************************************************/
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.spin.scrubber.uima.consumer;
+
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.log4j.Logger;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.collection.CasConsumer_ImplBase;
+import org.apache.uima.collection.base_cpm.CasObjectProcessor;
+import org.apache.uima.examples.SourceDocumentInformation;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceConfigurationException;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.ResourceProcessException;
+import org.apache.uima.util.ProcessTrace;
+import org.spin.scrubber.uima.type.Calculation;
+import org.spin.scrubber.uima.type.OntologyMatch;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Iterator;
+
+import static org.spin.scrubber.uima.consumer.CSVAnnotation.*;
+import static org.spin.scrubber.uima.consumer.CSVAnnotation.getCSV;
+
+public class CSVAnnotationConsumer extends CasConsumer_ImplBase implements CasObjectProcessor
+{
+ public static final Logger log = Logger.getLogger(CSVAnnotationConsumer.class);
+ public static final boolean DEBUG = log.isDebugEnabled();
+
+ public static final String OUTPUT_DIR = "outputFile";
+ public static final String DELIM = ",";
+ public static final String CRLF = System.getProperty("line.separator");
+
+ private File outDir;
+
+ public CSVAnnotationConsumer(){}
+
+ /**
+ * Initializes this CAS Consumer with the parameters specified in the descriptor.
+ *
+ * @throws org.apache.uima.resource.ResourceInitializationException
+ * if there is error in initializing the resources
+ */
+ public void initialize() throws ResourceInitializationException //TODO: refactor
+ {
+ log.debug("initialize()...");
+
+ String oPath = (String) getUimaContext().getConfigParameterValue(OUTPUT_DIR);
+
+ // Output file should be specified in the descriptor
+ if (oPath == null) {
+ throw new ResourceInitializationException(
+ ResourceInitializationException.CONFIG_SETTING_ABSENT, new Object[]{OUTPUT_DIR});
+ }
+
+ // If specified output directory does not exist, try to create it
+ outDir = new File(oPath.trim());
+ if (outDir.getParentFile() != null && !outDir.getParentFile().exists()) {
+ if (!outDir.getParentFile().mkdirs())
+ throw new ResourceInitializationException(
+ ResourceInitializationException.RESOURCE_DATA_NOT_VALID, new Object[]{oPath,OUTPUT_DIR}
+ );
+ }
+
+ log.debug("initialize() is done.");
+ }
+
+
+ /**
+ * Processes the CasContainer which was populated by the TextAnalysisEngines. <br>
+ * In this case, the CAS index is iterated over selected annotations and printed out into an
+ * output file
+ *
+ * @param aCAS CasContainer which has been populated by the TAEs
+ * @throws org.apache.uima.resource.ResourceProcessException
+ * if there is an error in processing the Resource
+ * @see org.apache.uima.collection.base_cpm.CasObjectProcessor#processCas(org.apache.uima.cas.CAS)
+ */
+ public synchronized void processCas(CAS aCAS) throws ResourceProcessException
+ {
+ JCas jcas;
+
+ try {
+ jcas = aCAS.getJCas();
+ } catch (CASException e) {
+ throw new ResourceProcessException(e);
+ }
+
+ String inFileURI = null;
+ String inFilename = null;
+
+ Iterator iter = jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
+
+ if (iter.hasNext())
+ {
+ SourceDocumentInformation srcDocInfo = (SourceDocumentInformation) iter.next();
+ inFileURI = srcDocInfo.getUri();
+
+ String split[] = inFileURI.split("/");
+ inFilename = split[split.length - 1];
+ }
+
+ if(DEBUG)
+ System.out.println(CSVAnnotation.getLine("processCas", inFilename, inFileURI));
+
+ try
+ {
+ processCasAnnotation(jcas, inFilename);
+ {
+ processCasOntologyMatch(jcas, inFilename);
+ }
+
+ processCasBaseToken(jcas, inFilename);
+ {
+ processCasSentence(jcas, inFilename);
+ processCasWordToken(jcas, inFilename);
+ processCasNewlineToken(jcas, inFilename);
+ processCasCalculation(jcas, inFilename);
+ }
+
+ CSVAnnotation.flushAll();
+ }
+ catch(IOException e)
+ {
+ throw new ResourceProcessException(e);
+ }
+ }
+
+ private void processCasAnnotation(JCas jcas, String filename_short) throws IOException
+ {
+ processCasAnnotation(jcas, filename_short, _Annotation.type);
+ }
+
+ private void processCasAnnotation(JCas jcas, String filename_short, int annotationIndex) throws IOException
+ {
+ Iterator annotationIter = jcas.getAnnotationIndex(annotationIndex).iterator();
+
+ while(annotationIter.hasNext())
+ {
+ Annotation annot = (Annotation) annotationIter.next();
+
+ if(isEmpty(annot.getCoveredText()))
+ continue;
+
+ if(DEBUG) System.out.println(annot.toString());
+
+ _Annotation.writeLine(getCSV(
+ getCSV(filename_short, annot)));
+ }
+ }
+
+ private void processCasBaseToken(JCas jcas, String filename_short) throws IOException
+ {
+ processCasBaseToken(jcas, filename_short, _BaseToken.type);
+ }
+
+ private void processCasBaseToken(JCas jcas, String filename_short, int annotationIndex) throws IOException
+ {
+ Iterator annotationIter = jcas.getAnnotationIndex(annotationIndex).iterator();
+
+ while(annotationIter.hasNext())
+ {
+ BaseToken annot = (BaseToken) annotationIter.next();
+
+ if(isEmpty(annot.getCoveredText()))
+ continue;
+
+ if(DEBUG) System.out.println(annot.toString());
+
+ _BaseToken.writeLine(getCSV(
+ getCSVBaseToken(filename_short, annot),
+ getCSV(getCoveredText(annot))));
+ }
+ }
+
+ private void processCasSentence(JCas jcas, String filename_short) throws IOException
+ {
+ Iterator iter = jcas.getAnnotationIndex(_Sentence.type).iterator();
+
+ while (iter.hasNext())
+ {
+ Sentence annot = (Sentence) iter.next();
+
+ _Sentence.writeLine(
+ getCSV( getCSV(filename_short, annot),
+ getCSV(annot.getSentenceNumber())));
+
+ _SentenceText.writeLine(annot.getCoveredText());
+ }
+ }
+
+ private void processCasWordToken(JCas jcas, String filename_short) throws IOException
+ {
+ Iterator annotationIter = jcas.getAnnotationIndex(_WordToken.type).iterator();
+
+ while(annotationIter.hasNext())
+ {
+ WordToken annot = (WordToken) annotationIter.next();
+
+ if(isEmpty(annot.getCoveredText()))
+ continue;
+
+ _WordToken.writeLine(
+ getCSV( getCSVBaseToken(filename_short, annot),
+ getCSV(annot.getCapitalization()),
+ getCSV(getCoveredText(annot))));
+ }
+ }
+
+ private void processCasNewlineToken(JCas jcas, String filename_short) throws IOException
+ {
+ Iterator iter = jcas.getAnnotationIndex(_NewLineToken.type).iterator();
+
+ while (iter.hasNext())
+ {
+ NewlineToken annot = (NewlineToken) iter.next();
+
+ _NewLineToken.writeLine(
+ getCSVBaseToken(filename_short, annot)
+ );
+ }
+ }
+
+ private void processCasOntologyMatch(JCas jcas, String filename_short) throws IOException
+ {
+ Iterator iter = jcas.getAnnotationIndex(_OntologyMatch.type).iterator();
+
+ while (iter.hasNext())
+ {
+ OntologyMatch annot = (OntologyMatch) iter.next();
+
+ _OntologyMatch.writeLine(getCSV(
+ getCSV(filename_short, annot),
+ getCSV(annot.getOntology(), annot.getCode(), getCoveredText(annot))));
+ }
+ }
+
+ private void processCasCalculation(JCas jcas, String filename_short) throws IOException
+ {
+ Iterator iter = jcas.getAnnotationIndex(_Calculation.type).iterator();
+
+ while (iter.hasNext())
+ {
+ Calculation annot = (Calculation) iter.next();
+
+ _Calculation.writeLine(getCSV(
+ getCSV(filename_short, annot),
+ getCSV(annot.getCalculationName(), annot.getCalculationValue())));
+ }
+ }
+
+ private String getCoveredText(Annotation annot)
+ {
+ return CSVAnnotation.getCoveredText(annot.getCoveredText());
+ }
+
+ private boolean isEmpty(String coveredText)
+ {
+ coveredText = coveredText.replace('\n', ' ');
+ coveredText = coveredText.replace('\r', ' ');
+
+ return coveredText.trim().length()< 1;
+ }
+
+
+ /**
+ * Called when a batch of processing is completed.
+ *
+ * @param aTrace ProcessTrace object that will log events in this method.
+ * @throws org.apache.uima.resource.ResourceProcessException
+ * if there is an error in processing the Resource
+ * @throws java.io.IOException if there is an IO Error
+ * @see org.apache.uima.collection.CasConsumer#batchProcessComplete(org.apache.uima.util.ProcessTrace)
+ */
+ public void batchProcessComplete(ProcessTrace aTrace) throws ResourceProcessException,
+ IOException {
+ // nothing to do in this case as AnnotationPrinter doesnot do
+ // anything cumulatively
+ }
+
+ /**
+ * Called when the entire collection is completed.
+ *
+ * @param aTrace ProcessTrace object that will log events in this method.
+ * @throws org.apache.uima.resource.ResourceProcessException
+ * if there is an error in processing the Resource
+ * @throws java.io.IOException if there is an IO Error
+ * @see org.apache.uima.collection.CasConsumer#collectionProcessComplete(org.apache.uima.util.ProcessTrace)
+ */
+ public void collectionProcessComplete(ProcessTrace aTrace) throws ResourceProcessException, IOException
+ {
+ CSVAnnotation.closeAll();
+ }
+
+
+ /**
+ * Reconfigures the parameters of this Consumer. <br>
+ * This is used in conjunction with the setConfigurationParameterValue to set the configuration
+ * parameter values to values other than the ones specified in the descriptor.
+ *
+ * @throws org.apache.uima.resource.ResourceConfigurationException
+ * if the configuration parameter settings are invalid
+ * @see org.apache.uima.resource.ConfigurableResource#reconfigure()
+ */
+ public void reconfigure() throws ResourceConfigurationException {
+ super.reconfigure();
+ // extract configuration parameter settings
+ String oPath = (String) getUimaContext().getConfigParameterValue("outputFile");
+ File oFile = new File(oPath.trim());
+ // if output file has changed, close exiting file and open new
+ if (!oFile.equals(this.outDir)) {
+ this.outDir = oFile;
+ try {
+ CSVAnnotation.closeAll();
+
+ // If specified output directory does not exist, try to create it
+ if (oFile.getParentFile() != null && !oFile.getParentFile().exists()) {
+ if (!oFile.getParentFile().mkdirs())
+ throw new ResourceConfigurationException(
+ ResourceInitializationException.RESOURCE_DATA_NOT_VALID, new Object[]{oPath,
+ "outputFile"});
+ }
+
+ } catch (IOException e) {
+ throw new ResourceConfigurationException();
+ }
+ }
+ }
+
+ /**
+ * Called if clean up is needed in case of exit under error conditions.
+ *
+ * @see org.apache.uima.resource.Resource#destroy()
+ */
+ public void destroy()
+ {
+ try
+ {
+ CSVAnnotation.closeAll();
+ }
+ catch(Exception e)
+ {
+ log.error("Could not close writers during destroy()");
+ }
+ }
+}
Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/consumer/CSVAnnotationConsumer.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/consumer/JDBCCasConsumer.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/consumer/JDBCCasConsumer.java?rev=1500511&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/consumer/JDBCCasConsumer.java (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/consumer/JDBCCasConsumer.java Sun Jul 7 19:23:05 2013
@@ -0,0 +1,474 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ ******************************************************************************/
+/**
+ *
+ */
+package org.spin.scrubber.uima.consumer;
+
+import com.mysql.jdbc.PreparedStatement;
+
+import org.apache.ctakes.typesystem.type.syntax.NumToken;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.apache.log4j.Logger;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.collection.CasConsumer_ImplBase;
+import org.apache.uima.examples.SourceDocumentInformation;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.ResourceProcessException;
+import org.apache.uima.util.ProcessTrace;
+import org.spin.scrubber.uima.dao.BaseDAO;
+import org.spin.scrubber.uima.type.Calculation;
+import org.spin.scrubber.uima.type.KnownPHI;
+import org.spin.scrubber.uima.type.OntologyMatch;
+
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.SQLException;
+import java.util.Iterator;
+
+/**
+ * @author bf19
+ *
+ */
+public class JDBCCasConsumer extends CasConsumer_ImplBase
+{
+ private static Logger log = Logger.getLogger(JDBCCasConsumer.class);
+
+ private static Connection conn;
+ private static String tableName;
+
+ /**
+ * Initializes CAS Consumer with DB params from xml descriptor.
+ *
+ * @throws ResourceInitializationException
+ * if there is error in initializing the resources
+ */
+ public void initialize() throws ResourceInitializationException
+ {
+ // extract configuration parameter settings
+ tableName = (String) getUimaContext().getConfigParameterValue("tableName");
+
+ //if any params are null or empty throw exception
+ if (tableName==null || tableName.equals(""))
+ {
+ throw new ResourceInitializationException(ResourceInitializationException.CONFIG_SETTING_ABSENT, new Object[] { "requires db table name." });
+ }
+
+ try
+ {
+ //TODO: remove?
+// //get db connection info
+// Properties props= new Properties();
+// ClassLoader loader = this.getClass().getClassLoader();
+// InputStream inputStream = loader.getResourceAsStream(PROPERTIES_FILE_NAME);
+// props.load(inputStream);
+//
+// String driver = props.getProperty("JDBC_DRIVER");
+// String connString = props.getProperty("JDBC_CONNECTION_STRING");
+// String user = props.getProperty("DB_USER");
+// String pw = props.getProperty("DB_PW");
+//
+// Class.forName(driver);
+// conn = DriverManager.getConnection(connString, user, pw);
+
+ conn = BaseDAO.getConnectionToScrubber();
+ }
+ catch (Exception e)
+ {
+ e.printStackTrace();
+ log.error("unable to initialize JDBCCasConsumer: " + e.getMessage());
+ throw new ResourceInitializationException(ResourceInitializationException.CONFIG_SETTING_ABSENT, new Object[] { "requires driver, connectionString, user, and pw to connect to db." });
+ }
+ }
+
+ /**
+ * much of this method was cannibalized from Apache's AnnotationPrinter.java
+ */
+ public synchronized void processCas(CAS aCAS) throws ResourceProcessException
+ {
+ JCas jcas;
+ try
+ {
+ jcas = aCAS.getJCas();
+ }
+ catch (CASException e)
+ {
+ throw new ResourceProcessException(e);
+ }
+
+ String filenameLong = null;
+ String filenameShort = null;
+ Iterator it = jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
+ if (it.hasNext())
+ {
+ SourceDocumentInformation srcDocInfo = (SourceDocumentInformation) it.next();
+ filenameLong = srcDocInfo.getUri();
+ filenameShort = filenameLong.substring(filenameLong.lastIndexOf("/")+1);
+ }
+
+ //iterate and records annotations
+ processKnownPHI(filenameLong, filenameShort, jcas);
+ processWordToken(filenameLong, filenameShort, jcas);
+ processOntologyMatch(filenameLong, filenameShort, jcas);
+ processNumToken(filenameLong, filenameShort, jcas);
+ processCalculation(filenameLong, filenameShort, jcas);
+
+ }
+
+ private void processNumToken(String filenameLong, String filenameShort, JCas jcas) throws ResourceProcessException
+ {
+ //we only want to record the 'NumToken' annotations...
+ Iterator annotationIter = jcas.getAnnotationIndex(NumToken.type).iterator();
+ while (annotationIter.hasNext())
+ {
+ Annotation annot = (Annotation) annotationIter.next();
+
+ // get the text that is enclosed within the annotation in the CAS
+ String token = annot.getCoveredText();
+ token = token.replace('\n', ' ');
+ token = token.replace('\r', ' ');
+
+ //if aText is empty, continue;
+ if (token.trim().length()<1)
+ {
+ continue;
+ }
+ //if token length > 100, trim it down
+ if (token.length()>100)
+ {
+ log.info("INFO: trimming token: " + token + "(" + token.length() + ")");
+ token = token.substring(0, 100);
+ }
+
+ //get Features... //TODO: there must be a better way to do this...
+ Feature posFeat=null;
+ String posTag=null;
+ try
+ {
+ String featName = (String) getUimaContext().getConfigParameterValue("pos");
+ posFeat = annot.getCAS().getTypeSystem().getFeatureByFullName(featName);
+ if (posFeat!=null)
+ posTag = annot.getFeatureValueAsString(posFeat);
+ }
+ catch(Exception e)
+ {
+ log.warn("unable to get pos feature value. may be null. continuing.", e);
+ }
+
+ try
+ {
+ insert(filenameLong, filenameShort, annot.getType().getShortName(), annot.getType().getName(), token, annot.getBegin(), annot.getEnd(), posTag, "pos");
+ }
+ catch(SQLException e)
+ {
+ throw new ResourceProcessException(e);
+ }
+ }
+ }
+ private void processOntologyMatch(String filenameLong, String filenameShort, JCas jcas) throws ResourceProcessException
+ {
+ //we only want to record the 'OntologyMatch' annotations...
+ Iterator annotationIter = jcas.getAnnotationIndex(OntologyMatch.type).iterator();
+ while (annotationIter.hasNext())
+ {
+ OntologyMatch annot = (OntologyMatch) annotationIter.next();
+
+ // get the text that is enclosed within the annotation in the CAS
+ String token = "";
+
+ try
+ {
+ token = annot.getCoveredText();
+ }
+ catch(StringIndexOutOfBoundsException e)
+ {
+ e.printStackTrace();
+ System.out.println("ERROR: index error on file: " + filenameShort + " position: " + annot.getBegin() +"-"+ annot.getEnd());
+ continue;
+ }
+ token = token.replace('\n', ' ');
+ token = token.replace('\r', ' ');
+
+ //if aText is empty, continue;
+ if (token.trim().length()<1)
+ {
+ continue;
+ }
+ //if token length > 100, trim it down
+ if (token.length()>100)
+ {
+ System.out.println("INFO: trimming token: " + token + "(" + token.length() + ")");
+ token = token.substring(0, 100);
+ }
+
+ String code = annot.getCode();
+ String ontology = annot.getOntology();
+
+ //trim code if its too long
+ if (code.length()>50)
+ {
+ code = code.substring(0,50);
+ }
+
+ //trim ontology if its too long
+ if (ontology.length()>50)
+ {
+ ontology = ontology.substring(0,50);
+ }
+
+ try
+ {
+ insert(filenameLong, filenameShort, annot.getType().getShortName(), annot.getType().getName(), token, annot.getBegin(), annot.getEnd(), code, ontology);
+ }
+ catch(SQLException e)
+ {
+ throw new ResourceProcessException(e);
+ }
+ }
+ }
+
+ private void processKnownPHI(String filenameLong, String filenameShort, JCas jcas) throws ResourceProcessException
+ {
+ //we only want to record the 'KnownPHI' annotations...
+ Iterator annotationIter = jcas.getAnnotationIndex(KnownPHI.type).iterator();
+ while (annotationIter.hasNext())
+ {
+ KnownPHI annot = (KnownPHI) annotationIter.next();
+
+ // get the text that is enclosed within the annotation in the CAS
+ // NOTE: for knownPHI type the covered text is in getContent().
+ // this is because knownPHI annots are made by the reader, not an annotator, this should be fixed in the future.
+ String token = annot.getContent();
+ token = token.replace('\n', ' ');
+ token = token.replace('\r', ' ');
+
+ //if aText is empty, continue;
+ if (token.trim().length()<1)
+ {
+ continue;
+ }
+ //if token length > 100, trim it down
+ if (token.length()>100)
+ {
+ System.out.println("INFO: trimming token: " + token + "(" + token.length() + ")");
+ token = token.substring(0, 100);
+ }
+
+ String code = annot.getCode();
+ String ontology = annot.getOntology();
+
+ //trim code if its too long
+ if (code.length()>50)
+ {
+ code = code.substring(0,50);
+ }
+
+ //trim ontology if its too long
+ if (ontology.length()>50)
+ {
+ ontology = ontology.substring(0,50);
+ }
+
+ try
+ {
+ insert(filenameLong, filenameShort, annot.getType().getShortName(), annot.getType().getName(), token, annot.getBegin(), annot.getEnd(), code, ontology);
+ }
+ catch(SQLException e)
+ {
+ throw new ResourceProcessException(e);
+ }
+ }
+ }
+
+ private void processCalculation(String filenameLong, String filenameShort, JCas jcas) throws ResourceProcessException
+ {
+ //we only want to record the 'Calculation' annotations...
+ Iterator annotationIter = jcas.getAnnotationIndex(Calculation.type).iterator();
+ while (annotationIter.hasNext())
+ {
+ Calculation annot = (Calculation) annotationIter.next();
+
+ // get the text that is enclosed within the annotation in the CAS
+ String token = annot.getCoveredText();
+ token = token.replace('\n', ' ');
+ token = token.replace('\r', ' ');
+
+ //if aText is empty, continue;
+ if (token.trim().length()<1)
+ {
+ continue;
+ }
+ //if token length > 100, trim it down
+ if (token.length()>100)
+ {
+ log.info("trimming token: " + token + "(" + token.length() + ")");
+ token = token.substring(0, 100);
+ }
+
+ String calcName = annot.getCalculationName();
+ String calcValue = annot.getCalculationValue();
+
+ //trim calcName if its too long
+ if (calcName.length()>50)
+ {
+ calcName = calcName.substring(0,50);
+ }
+
+ try
+ {
+ insert(filenameLong, filenameShort, annot.getType().getShortName(), annot.getType().getName(), token, annot.getBegin(), annot.getEnd(), calcValue, calcName);
+ }
+ catch(SQLException e)
+ {
+ throw new ResourceProcessException(e);
+ }
+ }
+ }
+
+ private void processWordToken(String filenameLong, String filenameShort, JCas jcas) throws ResourceProcessException
+ {
+ //we only want to record the 'WordToken' annotations...
+ Iterator annotationIter = jcas.getAnnotationIndex(WordToken.type).iterator();
+ while (annotationIter.hasNext())
+ {
+ Annotation annot = (Annotation) annotationIter.next();
+
+ // get the text that is enclosed within the annotation in the CAS
+ String token = annot.getCoveredText();
+ token = token.replace('\n', ' ');
+ token = token.replace('\r', ' ');
+
+ //if aText is empty, continue;
+ if (token.trim().length()<1)
+ {
+ continue;
+ }
+ //if token length > 100, trim it down
+ if (token.length()>100)
+ {
+ log.info("INFO: trimming token: " + token + "(" + token.length() + ")");
+ token = token.substring(0, 100);
+ }
+
+ //get Features... //TODO: there must be a better way to do this...
+ Feature posFeat=null;
+ String posTag=null;
+ try
+ {
+ String featName = (String) getUimaContext().getConfigParameterValue("pos");
+ posFeat = annot.getCAS().getTypeSystem().getFeatureByFullName(featName);
+ if (posFeat!=null)
+ posTag = annot.getFeatureValueAsString(posFeat);
+ }
+ catch(Exception e)
+ {
+ log.warn("unable to get POS feature declared in consumer.xml. may be null. continuing.");
+ e.printStackTrace();
+ }
+
+ //get Features... //TODO: there must be a better way to do this...
+ Feature capFeat=null;
+ String capTag=null;
+ try
+ {
+ String capName = (String) getUimaContext().getConfigParameterValue("capitalization");
+ capFeat = annot.getCAS().getTypeSystem().getFeatureByFullName(capName);
+ if (capFeat!=null)
+ capTag = annot.getFeatureValueAsString(capFeat);
+ }
+ catch(Exception e)
+ {
+ log.warn("unable to get CAP feature declared in consumer.xml. may be null. continuing.");
+ e.printStackTrace();
+ }
+
+ try
+ {
+ //insert pos & capitalization features.
+ insert(filenameLong, filenameShort, annot.getType().getShortName(), annot.getType().getName(), token, annot.getBegin(), annot.getEnd(), posTag, "pos");
+ insert(filenameLong, filenameShort, annot.getType().getShortName(), annot.getType().getName(), token, annot.getBegin(), annot.getEnd(), capTag, "cap");
+ }
+ catch(SQLException e)
+ {
+ throw new ResourceProcessException(e);
+ }
+ }
+ }
+
+ private void insert(String filenameLong, String filenameShort, String annot_type_short, String annot_type_long, String token, int start_idx, int end_idx, String match_value, String match_source) throws SQLException
+ {
+ PreparedStatement ps = null;
+ int updated = 0;
+ StringBuilder sql = new StringBuilder("insert into "+tableName+" (filename_long, filename_short, annot_type_short, annot_type_long, token, start_idx, end_idx, match_value, match_source) values (?,?,?,?,?,?,?,?,?);");
+
+ try
+ {
+ ps = (PreparedStatement) conn.prepareStatement(sql.toString());
+ int i=1;
+ ps.setString(i++, filenameLong);
+ ps.setString(i++, filenameShort);
+ ps.setString(i++, annot_type_short);
+ ps.setString(i++, annot_type_long);
+ ps.setString(i++, token);
+ ps.setInt(i++, start_idx);
+ ps.setInt(i++, end_idx);
+ ps.setString(i++, match_value);
+ ps.setString(i++, match_source);
+
+ updated = ps.executeUpdate();
+
+ if(updated!=1) { throw new SQLException("ERROR: db insert count="+updated + ". expected=1"); }
+ }
+ catch (SQLException e)
+ {
+ log.error("processing CAS in JDBCCasConsumer: " + e.getMessage());
+ throw e;
+ }
+ finally
+ {
+ if (ps!=null)
+ {
+ ps.close();
+ }
+ }
+ }
+
+ /**
+ * close db connection.
+ */
+ public void collectionProcessComplete(ProcessTrace arg0) throws ResourceProcessException,IOException
+ {
+ try
+ {
+ if (conn!=null)
+ {
+ conn.close();
+ }
+ }
+ catch (Exception e)
+ {
+ e.printStackTrace();
+ log.error("closing DB connection for JDBCCasConsumer - SWALLOWING: " + e.getMessage());
+ }
+ }
+}
Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/consumer/JDBCCasConsumer.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/core/ReferenceTextStripper.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/core/ReferenceTextStripper.java?rev=1500511&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/core/ReferenceTextStripper.java (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/core/ReferenceTextStripper.java Sun Jul 7 19:23:05 2013
@@ -0,0 +1,232 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ ******************************************************************************/
+/**
+ *
+ */
+package org.spin.scrubber.uima.core;
+
+import org.apache.log4j.Logger;
+import org.spin.scrubber.ScrubberProperties;
+
+import java.io.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * @author bf19
+ */
+public class ReferenceTextStripper implements Runnable
+{
+ private static Logger log = Logger.getLogger(ReferenceTextStripper.class);
+
+ public static final String REGEX_ET_AL="(\\b[A-Z][A-Za-z-]*(\\.)?(\\s+))+([E|e][T|t]\\s+[A|a][L|l])";
+
+ private String dirInputPublicationsTXT;
+ private String dirInputPublicationsProcessed;
+
+ public ReferenceTextStripper()
+ {
+ this( ScrubberProperties.getDirInputPublicationsTXT(),
+ ScrubberProperties.getDirInputPublicationsProcessed()
+ );
+ }
+
+ public ReferenceTextStripper(String dirInputPublicationsTXT, String dirInputPublicationsProcessed)
+ {
+ this.dirInputPublicationsTXT = dirInputPublicationsTXT;
+ this.dirInputPublicationsProcessed = dirInputPublicationsProcessed;
+ }
+
+ /**
+ * @param args
+ * @throws Exception
+ */
+ public static void main(String[] args) throws Exception
+ {
+ ReferenceTextStripper stripper = new ReferenceTextStripper();
+ stripper.run();
+ }
+
+ public void run()
+ {
+ try
+ {
+ File inDir = new File(dirInputPublicationsTXT);
+
+ if (!inDir.exists())
+ {
+ inDir.createNewFile();
+ }
+
+ File[] files = inDir.listFiles();
+ int i=1;
+ for (File f : files)
+ {
+ if (f.isDirectory())
+ {
+ log.debug("SKIPPING File - isDirectory: " + i++ + " - " + f.getName() + "\t");
+ continue;
+ }
+
+ String fname = f.getName();
+
+ //make new outfile
+ File newFile = new File(dirInputPublicationsProcessed + File.separatorChar + fname);
+ if (newFile.exists())
+ {
+ log.debug("SKIPPING File - already exists: " + i + " - " + newFile.getName() + "\t") ;
+ continue;
+ }
+
+ log.debug("File: " + i++ + " - " + fname + "\t") ;
+
+ //read infile
+ String strLine;
+ BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(f)));
+ StringBuffer buff = new StringBuffer();
+ String content;
+
+ while ((strLine = br.readLine()) != null)
+ {
+ buff.append(strLine);
+ buff.append("\n");
+ }
+
+ content = buff.toString();
+
+ FileWriter writer = new FileWriter(newFile);
+
+ //strip references - substr from start to token 'References'
+// content = stripReferencesSection(content);
+
+ //strip "<Token(s)> et al." - gets a series of init case tokens followed by "et al"
+ //TODO: maybe also strip (*<Token(s)> et al.*) to get cases like:
+ //"(Grindlay, 1997; Ono et al., 2001; Hirose, 2005; Terashima et al., 2005; Niinemets, 2007)"
+ //"(Walters et al., 1999; Weaver and Amasino, 2001)"
+ content = stripInLineReferences(content);
+
+ //TODO: strip pub authors & institutions - there has got to be a better way than this...
+// content = stripPubAuthors(content);
+
+ //write new contents.
+ writer.write(content);
+ writer.flush();
+
+ //wrap up
+ writer.close();
+ br.close();
+ buff=null;
+ }
+ }
+ catch(Exception e)
+ {
+ log.error("Unknown error during reference text stripping", e);
+ }
+ }
+
+ private String stripInLineReferences(String content)
+ {
+ try
+ {
+ Pattern p = Pattern.compile(REGEX_ET_AL);
+ Matcher m = p.matcher(content);
+
+ while (m.find())
+ {
+ String matched = m.group();
+
+ System.out.println("Stripping inline reference: "+matched);
+
+ // remove any reg ex special chars
+ String[] specialChars = { "\\", "[", "{", "}", "[", "]", "$", "^", "|", "(", ")", "*", "?", "+", "]", ".", "/"};
+
+ String escaped = matched;
+
+ for (String specialChar : specialChars) {
+ escaped = escaped.replaceAll("([^\\" + specialChar + "]*)(\\" + specialChar + ")([^\\" + specialChar + "]*)", "$1\\\\$2$3");
+ }
+
+ escaped = "(\\W|\\b)" + escaped + "(\\W|\\b)";
+
+ content = content.replaceAll(escaped, " AUTHOR et al.");
+ }
+
+ return content;
+ }
+ catch(Exception e)
+ {
+ e.printStackTrace();
+ }
+
+ //if method fails to match regex, just return empty string.
+ return "";
+
+// try
+// {
+// return content.replaceAll(REGEX_ET_AL, " AUTHOR et al.");
+// }
+// catch(Exception e)
+// {
+// e.printStackTrace();
+// }
+//
+// //if method fails to match regex, just return empty string.
+// return "";
+ }
+
+ /**
+ * over cautious stripping. if for some reason the token 'references' only existed as a word in the body
+ * and not a section heading, this would make the pub basically useless as it would strip most of the content.
+ * @param content
+ * @return
+ */
+ @Deprecated
+ private String stripReferencesSection(String content)
+ {
+ if (content!=null && content.toLowerCase().contains("references"))
+ {
+ //System.out.println("Found REFERENCES section. Stripping it...");
+ return content.substring(0, content.toLowerCase().lastIndexOf("references"));
+ }
+ else
+ {
+ return content;
+ }
+ }
+
+ /**
+ * over cautious stripping. if pub did not have the standard IMRD sections, this may erroneously cut more than needed.
+ * @param content
+ * @return
+ */
+ @Deprecated
+ private String stripPubAuthors(String content)
+ {
+ if (content!=null && content.toLowerCase().contains("introduction"))
+ {
+ //System.out.println("Found REFERENCES section. Stripping it...");
+ return content.substring(content.toLowerCase().indexOf("introduction"));
+ }
+ else
+ {
+ return content;
+ }
+ }
+
+}
Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/core/ReferenceTextStripper.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/core/UIMARunner.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/core/UIMARunner.java?rev=1500511&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/core/UIMARunner.java (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/core/UIMARunner.java Sun Jul 7 19:23:05 2013
@@ -0,0 +1,97 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ ******************************************************************************/
+/**
+ *
+ */
+package org.spin.scrubber.uima.core;
+
+import org.apache.log4j.Logger;
+import org.apache.uima.UIMAFramework;
+import org.apache.uima.collection.CollectionProcessingEngine;
+import org.apache.uima.collection.metadata.CpeDescription;
+import org.apache.uima.util.XMLInputSource;
+
+import java.io.File;
+
+/**
+ * @author britt fitch
+ * @author Andrew McMurry
+ *
+ */
+public class UIMARunner implements Runnable
+{
+ private static Logger log = Logger.getLogger(UIMARunner.class);
+ private String cpe;
+
+ public UIMARunner(String cpe)
+ {
+ log.debug("CPE file: "+ cpe);
+
+ this.cpe = cpe;
+ }
+
+ /**
+ * @param args
+ * @throws Exception
+ */
+ public static void main(String[] args) throws Exception
+ {
+ log.info("Running UIMA CPE Pipeline (Collection processing engine)...");
+
+ if(args.length < 1)
+ {
+ System.out.println("You must provide a CPE file. See UIMA documentation for help.");
+ }
+ else
+ {
+ UIMARunner runner = new UIMARunner(args[0]);
+ runner.run();
+ }
+ }
+
+ public void run()
+ {
+ try
+ {
+ File cpeFile = new File(cpe);
+
+ if(!cpeFile.exists())
+ {
+ System.out.println("CPE file does not exist: "+ cpeFile.getAbsolutePath());
+ }
+ else
+ {
+ log.debug("Parsing CPE descriptor from "+ cpeFile.getAbsolutePath());
+ CpeDescription cpeDesc = UIMAFramework.getXMLParser().parseCpeDescription(new XMLInputSource(cpeFile));
+
+ log.debug("Instantiating CPE...");
+ CollectionProcessingEngine mCpe = UIMAFramework.produceCollectionProcessingEngine(cpeDesc);
+
+ log.debug("Starting process...");
+ mCpe.process();
+
+ log.debug("UIMA step complete...");
+ }
+ }
+ catch(Exception e)
+ {
+ log.error("Encountered error while executing UIMARunner.", e);
+ }
+ }
+}
Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/core/UIMARunner.java
------------------------------------------------------------------------------
svn:mime-type = text/plain