You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by br...@apache.org on 2013/07/07 21:23:07 UTC
svn commit: r1500511 [6/6] - in /ctakes/sandbox/ctakes-scrubber-deid/src: ./
main/ main/java/ main/java/org/ main/java/org/apache/
main/java/org/apache/uima/ main/java/org/apache/uima/examples/
main/java/org/spin/ main/java/org/spin/scrubber/ main/java...
Added: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/reader/FileSystemCollectionReaderXML.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/reader/FileSystemCollectionReaderXML.java?rev=1500511&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/reader/FileSystemCollectionReaderXML.java (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/reader/FileSystemCollectionReaderXML.java Sun Jul 7 19:23:05 2013
@@ -0,0 +1,379 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ ******************************************************************************/
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.spin.scrubber.uima.reader;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.collection.CollectionException;
+import org.apache.uima.collection.CollectionReader_ImplBase;
+import org.apache.uima.examples.SourceDocumentInformation;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.DocumentAnnotation;
+import org.apache.uima.resource.ResourceConfigurationException;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.FileUtils;
+import org.apache.uima.util.Progress;
+import org.apache.uima.util.ProgressImpl;
+import org.spin.scrubber.uima.type.KnownPHI;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.xpath.XPath;
+import javax.xml.xpath.XPathConstants;
+import javax.xml.xpath.XPathExpression;
+import javax.xml.xpath.XPathFactory;
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+
+/**
+ * A simple collection reader that reads documents from a directory in the filesystem. It can be
+ * configured with the following parameters:
+ * <ul>
+ * <li><code>InputDirectory</code> - path to directory containing files</li>
+ * <li><code>Encoding</code> (optional) - character encoding of the input files</li>
+ * <li><code>Language</code> (optional) - language of the input documents</li>
+ * </ul>
+ *
+ *
+ */
+public class FileSystemCollectionReaderXML extends CollectionReader_ImplBase {
+ /**
+ * Name of configuration parameter that must be set to the path of a directory containing input
+ * files.
+ */
+ public static final String PARAM_INPUTDIR = "InputDirectory";
+
+ /**
+ * Name of configuration parameter that contains the character encoding used by the input files.
+ * If not specified, the default system encoding will be used.
+ */
+ public static final String PARAM_ENCODING = "Encoding";
+
+ /**
+ * Name of optional configuration parameter that contains the language of the documents in the
+ * input directory. If specified this information will be added to the CAS.
+ */
+ public static final String PARAM_LANGUAGE = "Language";
+
+ /**
+ * Name of optional configuration parameter that indicates including
+ * the subdirectories (recursively) of the current input directory.
+ */
+ public static final String PARAM_SUBDIR = "BrowseSubdirectories";
+
+ private String[] scrubNodeList;
+ private String[] knownPHINodeList; //TODO: add knownPHI to another view to be handled later in the pipeline. possibly by consumers, classifiers, or redactors
+ private ArrayList<File> mFiles;
+ private String mEncoding;
+ private String mLanguage;
+ private Boolean mRecursive;
+ private int mCurrentIndex ;
+
+ /**
+ * @see org.apache.uima.collection.CollectionReader_ImplBase#initialize()
+ */
+ public void initialize() throws ResourceInitializationException
+ {
+ scrubNodeList = (String[])getConfigParameterValue("ScrubNodeList");
+ knownPHINodeList = (String[])getConfigParameterValue("KnownPHINodeList");
+ File directory = new File(((String) getConfigParameterValue(PARAM_INPUTDIR)).trim());
+ mEncoding = (String) getConfigParameterValue(PARAM_ENCODING);
+ mLanguage = (String) getConfigParameterValue(PARAM_LANGUAGE);
+ mRecursive = (Boolean) getConfigParameterValue(PARAM_SUBDIR);
+ if (null == mRecursive) { // could be null if not set, it is optional
+ mRecursive = Boolean.FALSE;
+ }
+ mCurrentIndex = 0;
+
+ // if input directory does not exist or is not a directory, throw exception
+ if (!directory.exists() || !directory.isDirectory()) {
+ throw new ResourceInitializationException(ResourceConfigurationException.DIRECTORY_NOT_FOUND,
+ new Object[] { PARAM_INPUTDIR, this.getMetaData().getName(), directory.getPath() });
+ }
+
+ // get list of files in the specified directory, and subdirectories if the
+ // parameter PARAM_SUBDIR is set to True
+ mFiles = new ArrayList<File>();
+ addFilesFromDir(directory);
+ }
+
+ /**
+ * This method adds files in the directory passed in as a parameter to mFiles.
+ * If mRecursive is true, it will include all files in all
+ * subdirectories (recursively), as well.
+ *
+ * @param dir
+ */
+ private void addFilesFromDir(File dir) {
+ File[] files = dir.listFiles();
+ for (int i = 0; i < files.length; i++) {
+ if (!files[i].isDirectory()) {
+ mFiles.add(files[i]);
+ } else if (mRecursive) {
+ addFilesFromDir(files[i]);
+ }
+ }
+ }
+
+ /**
+ * @see org.apache.uima.collection.CollectionReader#hasNext()
+ */
+ public boolean hasNext() {
+ return mCurrentIndex < mFiles.size();
+ }
+
+ /**
+ * @see org.apache.uima.collection.CollectionReader#getNext(org.apache.uima.cas.CAS)
+ */
+ public void getNext(CAS aCAS) throws IOException, CollectionException {
+ JCas jcas;
+ try {
+ jcas = aCAS.getJCas();
+ } catch (CASException e) {
+ throw new CollectionException(e);
+ }
+
+ // open input stream to file
+ File file = (File) mFiles.get(mCurrentIndex++);
+ String text = FileUtils.file2String(file, mEncoding);
+
+ //process xml header for KnownPHI fields. preserves the original character positions, but removes the XML tags
+ xpathHeader(jcas, file, text);
+
+ //build a representation of the file that preserves the original character positions, but removes the XML tags
+ text = xpathContent(file, text);
+
+ // put document in CAS
+ jcas.setDocumentText(text);
+
+ // set language if it was explicitly specified as a configuration parameter
+ if (mLanguage != null) {
+ ((DocumentAnnotation) jcas.getDocumentAnnotationFs()).setLanguage(mLanguage);
+ }
+
+ // Also store location of source document in CAS. This information is critical
+ // if CAS Consumers will need to know where the original document contents are located.
+ // For example, the Semantic Search CAS Indexer writes this information into the
+ // search index that it creates, which allows applications that use the search index to
+ // locate the documents that satisfy their semantic queries.
+ SourceDocumentInformation srcDocInfo = new SourceDocumentInformation(jcas);
+ srcDocInfo.setUri(file.getAbsoluteFile().toURL().toString());
+ srcDocInfo.setOffsetInSource(0);
+ srcDocInfo.setDocumentSize((int) file.length());
+ srcDocInfo.setLastSegment(mCurrentIndex == mFiles.size());
+ srcDocInfo.addToIndexes();
+ }
+
+ /**
+ * build a representation of the file that preserves the original character positions, but removes the XML tags
+ * @param file input file
+ * @param text xml representation of the file contents
+ * @return String of transformed file content.
+ */
+ private String xpathContent(File file, String text)
+ {
+ //init buffer to the size of the original document
+ //just supplying initial capacity does not appear to actually populate the buff with anything. (len = 0)
+ String initLen = "";
+ for (int i=0; i<text.length(); i++)
+ {
+ initLen+=" ";
+ }
+ StringBuilder buff = new StringBuilder(initLen);
+
+ try
+ {
+ //read infile
+ DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+ DocumentBuilder builder = factory.newDocumentBuilder();
+ Document doc = builder.parse(file);
+
+ //START iterate over dom, update phi tags with start&end attribs.
+ Element root = doc.getDocumentElement();
+ XPathFactory xPathfactory1 = XPathFactory.newInstance();
+ XPath xpath1 = xPathfactory1.newXPath();
+
+ for (String xpathString : scrubNodeList)
+ {
+
+ String nodeName = xpathString.substring(xpathString.lastIndexOf("/")+1);
+ XPathExpression expr = xpath1.compile(xpathString);
+
+ //read all matching nodes
+ NodeList nodes = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);
+
+ int contentIdx = -1;
+ int nodeIdx = -1;
+ //for each RECORD node in a file
+ for (int i=0; i<nodes.getLength(); i++)
+ {
+ Node n = nodes.item(i);
+ String content = n.getTextContent();
+
+ //get the idx of the node & the content of that node.
+ nodeIdx = text.indexOf("<"+nodeName, nodeIdx);
+ contentIdx = text.indexOf(">", nodeIdx)+1;
+
+ //see if this content is wrapped in CDATA, if so, move the idx to account for this.
+ String subsection = text.substring(contentIdx, contentIdx+9);
+ if (subsection.equalsIgnoreCase("<![CDATA["))
+ {
+ contentIdx += 9;
+ }
+
+ //remove placeholder and add content at the appropriate place from the original xml input
+ buff.delete(contentIdx, contentIdx+content.length());
+ buff.insert(contentIdx, content);
+
+ //update node idx incase multiple nodes w/ same name.
+ nodeIdx = contentIdx+content.length();
+ }
+ }
+ }
+ catch (Exception e)
+ {
+ System.out.println(e.getMessage());
+ e.printStackTrace();
+ }
+
+ //remove trailing spaces
+ return buff.toString().replaceAll("\\s+$", "");
+ }
+
+ /**
+ * process xml header and store annots for knownPHI. preserves the original character positions.
+ * @param file input file
+ * @param text xml representation of the file contents
+ * @return String of transformed file content.
+ */
+ private void xpathHeader(JCas jcas, File file, String text)
+ {
+ try
+ {
+ //read infile
+ DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+ DocumentBuilder builder = factory.newDocumentBuilder();
+ Document doc = builder.parse(file);
+
+ //START iterate over dom, update phi tags with start&end attribs.
+ Element root = doc.getDocumentElement();
+ XPathFactory xPathfactory1 = XPathFactory.newInstance();
+ XPath xpath1 = xPathfactory1.newXPath();
+
+ for (String xpathString : knownPHINodeList)
+ {
+
+ String nodeName = xpathString.substring(xpathString.lastIndexOf("/")+1);
+ XPathExpression expr = xpath1.compile(xpathString);
+
+ //read all matching nodes
+ NodeList nodes = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);
+
+ int contentIdx = -1;
+ int nodeIdx = -1;
+ //for each RECORD node in a file
+ for (int i=0; i<nodes.getLength(); i++)
+ {
+ Node n = nodes.item(i);
+ String content = n.getTextContent();
+
+ if (content==null || content.length()<1)
+ {
+ continue;
+ }
+
+ //get the idx of the node & the content of that node.
+ nodeIdx = text.indexOf("<"+nodeName, nodeIdx);
+ contentIdx = text.indexOf(">", nodeIdx)+1;
+
+ //see if this content is wrapped in CDATA, if so, move the idx to account for this.
+ String subsection = text.substring(contentIdx, contentIdx+9);
+ if (subsection.equalsIgnoreCase("<![CDATA["))
+ {
+ contentIdx += 9;
+ }
+
+ KnownPHI phi = new KnownPHI(jcas);
+ phi.setBegin(contentIdx);
+ phi.setEnd(contentIdx+content.length());
+ phi.setCode(nodeName);
+ phi.setOntology("knownPHI");
+ phi.setContent(content);
+ phi.addToIndexes();
+
+ //update node idx incase multiple nodes w/ same name.
+ nodeIdx = contentIdx+content.length();
+ }
+ }
+ }
+ catch (Exception e)
+ {
+ System.out.println(e.getMessage());
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ * @see org.apache.uima.collection.base_cpm.BaseCollectionReader#close()
+ */
+ public void close() throws IOException {
+ }
+
+ /**
+ * @see org.apache.uima.collection.base_cpm.BaseCollectionReader#getProgress()
+ */
+ public Progress[] getProgress() {
+ return new Progress[] { new ProgressImpl(mCurrentIndex, mFiles.size(), Progress.ENTITIES) };
+ }
+
+ /**
+ * Gets the total number of documents that will be returned by this collection reader. This is not
+ * part of the general collection reader interface.
+ *
+ * @return the number of documents in the collection
+ */
+ public int getNumberOfDocuments() {
+ return mFiles.size();
+ }
+
+}
Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/reader/FileSystemCollectionReaderXML.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/Calculation.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/Calculation.java?rev=1500511&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/Calculation.java (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/Calculation.java Sun Jul 7 19:23:05 2013
@@ -0,0 +1,114 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ ******************************************************************************/
+/* First created by JCasGen Fri Jan 27 15:07:47 EST 2012 */
+package org.spin.scrubber.uima.type;
+
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.JCasRegistry;
+import org.apache.uima.jcas.cas.TOP_Type;
+
+import org.apache.uima.jcas.tcas.Annotation;
+
+
+/**
+ * Updated by JCasGen Fri Jan 27 15:07:47 EST 2012
+ * XML source: C:/dev/scrubber-pipeline/desc/type/CalculationTypeSystem.xml
+ * @generated */
+public class Calculation extends Annotation {
+ /** @generated
+ * @ordered
+ */
+ public final static int typeIndexID = JCasRegistry.register(Calculation.class);
+ /** @generated
+ * @ordered
+ */
+ public final static int type = typeIndexID;
+ /** @generated */
+ public int getTypeIndexID() {return typeIndexID;}
+
+ /** Never called. Disable default constructor
+ * @generated */
+ protected Calculation() {}
+
+ /** Internal - constructor used by generator
+ * @generated */
+ public Calculation(int addr, TOP_Type type) {
+ super(addr, type);
+ readObject();
+ }
+
+ /** @generated */
+ public Calculation(JCas jcas) {
+ super(jcas);
+ readObject();
+ }
+
+ /** @generated */
+ public Calculation(JCas jcas, int begin, int end) {
+ super(jcas);
+ setBegin(begin);
+ setEnd(end);
+ readObject();
+ }
+
+ /** <!-- begin-user-doc -->
+ * Write your own initialization here
+ * <!-- end-user-doc -->
+ @generated modifiable */
+ private void readObject() {}
+
+
+
+ //*--------------*
+ //* Feature: calculationName
+
+ /** getter for calculationName - gets
+ * @generated */
+ public String getCalculationName() {
+ if (Calculation_Type.featOkTst && ((Calculation_Type)jcasType).casFeat_calculationName == null)
+ jcasType.jcas.throwFeatMissing("calculationName", "org.spin.scrubber.uima.type.Calculation");
+ return jcasType.ll_cas.ll_getStringValue(addr, ((Calculation_Type)jcasType).casFeatCode_calculationName);}
+
+ /** setter for calculationName - sets
+ * @generated */
+ public void setCalculationName(String v) {
+ if (Calculation_Type.featOkTst && ((Calculation_Type)jcasType).casFeat_calculationName == null)
+ jcasType.jcas.throwFeatMissing("calculationName", "org.spin.scrubber.uima.type.Calculation");
+ jcasType.ll_cas.ll_setStringValue(addr, ((Calculation_Type)jcasType).casFeatCode_calculationName, v);}
+
+
+ //*--------------*
+ //* Feature: calculationValue
+
+ /** getter for calculationValue - gets
+ * @generated */
+ public String getCalculationValue() {
+ if (Calculation_Type.featOkTst && ((Calculation_Type)jcasType).casFeat_calculationValue == null)
+ jcasType.jcas.throwFeatMissing("calculationValue", "org.spin.scrubber.uima.type.Calculation");
+ return jcasType.ll_cas.ll_getStringValue(addr, ((Calculation_Type)jcasType).casFeatCode_calculationValue);}
+
+ /** setter for calculationValue - sets
+ * @generated */
+ public void setCalculationValue(String v) {
+ if (Calculation_Type.featOkTst && ((Calculation_Type)jcasType).casFeat_calculationValue == null)
+ jcasType.jcas.throwFeatMissing("calculationValue", "org.spin.scrubber.uima.type.Calculation");
+ jcasType.ll_cas.ll_setStringValue(addr, ((Calculation_Type)jcasType).casFeatCode_calculationValue, v);}
+ }
+
+
Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/Calculation.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/Calculation_Type.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/Calculation_Type.java?rev=1500511&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/Calculation_Type.java (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/Calculation_Type.java Sun Jul 7 19:23:05 2013
@@ -0,0 +1,118 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ ******************************************************************************/
+/* First created by JCasGen Fri Jan 27 15:07:47 EST 2012 */
+package org.spin.scrubber.uima.type;
+
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.JCasRegistry;
+import org.apache.uima.cas.impl.CASImpl;
+import org.apache.uima.cas.impl.FSGenerator;
+import org.apache.uima.cas.FeatureStructure;
+import org.apache.uima.cas.impl.TypeImpl;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.impl.FeatureImpl;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.jcas.tcas.Annotation_Type;
+
+/**
+ * Updated by JCasGen Fri Jan 27 15:07:47 EST 2012
+ * @generated */
+public class Calculation_Type extends Annotation_Type {
+ /** @generated */
+ protected FSGenerator getFSGenerator() {return fsGenerator;}
+ /** @generated */
+ private final FSGenerator fsGenerator =
+ new FSGenerator() {
+ public FeatureStructure createFS(int addr, CASImpl cas) {
+ if (Calculation_Type.this.useExistingInstance) {
+ // Return eq fs instance if already created
+ FeatureStructure fs = Calculation_Type.this.jcas.getJfsFromCaddr(addr);
+ if (null == fs) {
+ fs = new Calculation(addr, Calculation_Type.this);
+ Calculation_Type.this.jcas.putJfsFromCaddr(addr, fs);
+ return fs;
+ }
+ return fs;
+ } else return new Calculation(addr, Calculation_Type.this);
+ }
+ };
+ /** @generated */
+ public final static int typeIndexID = Calculation.typeIndexID;
+ /** @generated
+ @modifiable */
+ public final static boolean featOkTst = JCasRegistry.getFeatOkTst("org.spin.scrubber.uima.type.Calculation");
+
+ /** @generated */
+ final Feature casFeat_calculationName;
+ /** @generated */
+ final int casFeatCode_calculationName;
+ /** @generated */
+ public String getCalculationName(int addr) {
+ if (featOkTst && casFeat_calculationName == null)
+ jcas.throwFeatMissing("calculationName", "org.spin.scrubber.uima.type.Calculation");
+ return ll_cas.ll_getStringValue(addr, casFeatCode_calculationName);
+ }
+ /** @generated */
+ public void setCalculationName(int addr, String v) {
+ if (featOkTst && casFeat_calculationName == null)
+ jcas.throwFeatMissing("calculationName", "org.spin.scrubber.uima.type.Calculation");
+ ll_cas.ll_setStringValue(addr, casFeatCode_calculationName, v);}
+
+
+
+ /** @generated */
+ final Feature casFeat_calculationValue;
+ /** @generated */
+ final int casFeatCode_calculationValue;
+ /** @generated */
+ public String getCalculationValue(int addr) {
+ if (featOkTst && casFeat_calculationValue == null)
+ jcas.throwFeatMissing("calculationValue", "org.spin.scrubber.uima.type.Calculation");
+ return ll_cas.ll_getStringValue(addr, casFeatCode_calculationValue);
+ }
+ /** @generated */
+ public void setCalculationValue(int addr, String v) {
+ if (featOkTst && casFeat_calculationValue == null)
+ jcas.throwFeatMissing("calculationValue", "org.spin.scrubber.uima.type.Calculation");
+ ll_cas.ll_setStringValue(addr, casFeatCode_calculationValue, v);}
+
+
+
+
+
+ /** initialize variables to correspond with Cas Type and Features
+ * @generated */
+ public Calculation_Type(JCas jcas, Type casType) {
+ super(jcas, casType);
+ casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator());
+
+
+ casFeat_calculationName = jcas.getRequiredFeatureDE(casType, "calculationName", "uima.cas.String", featOkTst);
+ casFeatCode_calculationName = (null == casFeat_calculationName) ? JCas.INVALID_FEATURE_CODE : ((FeatureImpl)casFeat_calculationName).getCode();
+
+
+ casFeat_calculationValue = jcas.getRequiredFeatureDE(casType, "calculationValue", "uima.cas.String", featOkTst);
+ casFeatCode_calculationValue = (null == casFeat_calculationValue) ? JCas.INVALID_FEATURE_CODE : ((FeatureImpl)casFeat_calculationValue).getCode();
+
+ }
+}
+
+
+
+
Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/Calculation_Type.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/KnownPHI.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/KnownPHI.java?rev=1500511&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/KnownPHI.java (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/KnownPHI.java Sun Jul 7 19:23:05 2013
@@ -0,0 +1,132 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ ******************************************************************************/
+/* First created by JCasGen Tue Feb 14 15:00:25 EST 2012 */
+package org.spin.scrubber.uima.type;
+
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.JCasRegistry;
+import org.apache.uima.jcas.cas.TOP_Type;
+
+import org.apache.uima.jcas.tcas.Annotation;
+
+
+/**
+ * Updated by JCasGen Wed Feb 15 09:56:18 EST 2012
+ * XML source: C:/dev/scrubber-pipeline/desc/type/KnownPHITypeSystem.xml
+ * @generated */
+public class KnownPHI extends Annotation {
+ /** @generated
+ * @ordered
+ */
+ public final static int typeIndexID = JCasRegistry.register(KnownPHI.class);
+ /** @generated
+ * @ordered
+ */
+ public final static int type = typeIndexID;
+ /** @generated */
+ public int getTypeIndexID() {return typeIndexID;}
+
+ /** Never called. Disable default constructor
+ * @generated */
+ protected KnownPHI() {}
+
+ /** Internal - constructor used by generator
+ * @generated */
+ public KnownPHI(int addr, TOP_Type type) {
+ super(addr, type);
+ readObject();
+ }
+
+ /** @generated */
+ public KnownPHI(JCas jcas) {
+ super(jcas);
+ readObject();
+ }
+
+ /** @generated */
+ public KnownPHI(JCas jcas, int begin, int end) {
+ super(jcas);
+ setBegin(begin);
+ setEnd(end);
+ readObject();
+ }
+
+ /** <!-- begin-user-doc -->
+ * Write your own initialization here
+ * <!-- end-user-doc -->
+ @generated modifiable */
+ private void readObject() {}
+
+
+
+ //*--------------*
+ //* Feature: code
+
+ /** getter for code - gets
+ * @generated */
+ public String getCode() {
+ if (KnownPHI_Type.featOkTst && ((KnownPHI_Type)jcasType).casFeat_code == null)
+ jcasType.jcas.throwFeatMissing("code", "org.spin.scrubber.uima.type.KnownPHI");
+ return jcasType.ll_cas.ll_getStringValue(addr, ((KnownPHI_Type)jcasType).casFeatCode_code);}
+
+ /** setter for code - sets
+ * @generated */
+ public void setCode(String v) {
+ if (KnownPHI_Type.featOkTst && ((KnownPHI_Type)jcasType).casFeat_code == null)
+ jcasType.jcas.throwFeatMissing("code", "org.spin.scrubber.uima.type.KnownPHI");
+ jcasType.ll_cas.ll_setStringValue(addr, ((KnownPHI_Type)jcasType).casFeatCode_code, v);}
+
+
+ //*--------------*
+ //* Feature: ontology
+
+ /** getter for ontology - gets
+ * @generated */
+ public String getOntology() {
+ if (KnownPHI_Type.featOkTst && ((KnownPHI_Type)jcasType).casFeat_ontology == null)
+ jcasType.jcas.throwFeatMissing("ontology", "org.spin.scrubber.uima.type.KnownPHI");
+ return jcasType.ll_cas.ll_getStringValue(addr, ((KnownPHI_Type)jcasType).casFeatCode_ontology);}
+
+ /** setter for ontology - sets
+ * @generated */
+ public void setOntology(String v) {
+ if (KnownPHI_Type.featOkTst && ((KnownPHI_Type)jcasType).casFeat_ontology == null)
+ jcasType.jcas.throwFeatMissing("ontology", "org.spin.scrubber.uima.type.KnownPHI");
+ jcasType.ll_cas.ll_setStringValue(addr, ((KnownPHI_Type)jcasType).casFeatCode_ontology, v);}
+
+
+ //*--------------*
+ //* Feature: content
+
+ /** getter for content - gets
+ * @generated */
+ public String getContent() {
+ if (KnownPHI_Type.featOkTst && ((KnownPHI_Type)jcasType).casFeat_content == null)
+ jcasType.jcas.throwFeatMissing("content", "org.spin.scrubber.uima.type.KnownPHI");
+ return jcasType.ll_cas.ll_getStringValue(addr, ((KnownPHI_Type)jcasType).casFeatCode_content);}
+
+ /** setter for content - sets
+ * @generated */
+ public void setContent(String v) {
+ if (KnownPHI_Type.featOkTst && ((KnownPHI_Type)jcasType).casFeat_content == null)
+ jcasType.jcas.throwFeatMissing("content", "org.spin.scrubber.uima.type.KnownPHI");
+ jcasType.ll_cas.ll_setStringValue(addr, ((KnownPHI_Type)jcasType).casFeatCode_content, v);}
+ }
+
+
Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/KnownPHI.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/KnownPHI_Type.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/KnownPHI_Type.java?rev=1500511&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/KnownPHI_Type.java (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/KnownPHI_Type.java Sun Jul 7 19:23:05 2013
@@ -0,0 +1,140 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ ******************************************************************************/
+/* First created by JCasGen Tue Feb 14 15:00:25 EST 2012 */
+package org.spin.scrubber.uima.type;
+
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.JCasRegistry;
+import org.apache.uima.cas.impl.CASImpl;
+import org.apache.uima.cas.impl.FSGenerator;
+import org.apache.uima.cas.FeatureStructure;
+import org.apache.uima.cas.impl.TypeImpl;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.impl.FeatureImpl;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.jcas.tcas.Annotation_Type;
+
+/**
+ * Updated by JCasGen Wed Feb 15 09:56:18 EST 2012
+ * @generated */
+public class KnownPHI_Type extends Annotation_Type {
+ /** @generated */
+ protected FSGenerator getFSGenerator() {return fsGenerator;}
+ /** @generated */
+ private final FSGenerator fsGenerator =
+ new FSGenerator() {
+ public FeatureStructure createFS(int addr, CASImpl cas) {
+ if (KnownPHI_Type.this.useExistingInstance) {
+ // Return eq fs instance if already created
+ FeatureStructure fs = KnownPHI_Type.this.jcas.getJfsFromCaddr(addr);
+ if (null == fs) {
+ fs = new KnownPHI(addr, KnownPHI_Type.this);
+ KnownPHI_Type.this.jcas.putJfsFromCaddr(addr, fs);
+ return fs;
+ }
+ return fs;
+ } else return new KnownPHI(addr, KnownPHI_Type.this);
+ }
+ };
+ /** @generated */
+ public final static int typeIndexID = KnownPHI.typeIndexID;
+ /** @generated
+ @modifiable */
+ public final static boolean featOkTst = JCasRegistry.getFeatOkTst("org.spin.scrubber.uima.type.KnownPHI");
+
+ /** @generated */
+ final Feature casFeat_code;
+ /** @generated */
+ final int casFeatCode_code;
+ /** @generated */
+ public String getCode(int addr) {
+ if (featOkTst && casFeat_code == null)
+ jcas.throwFeatMissing("code", "org.spin.scrubber.uima.type.KnownPHI");
+ return ll_cas.ll_getStringValue(addr, casFeatCode_code);
+ }
+ /** @generated */
+ public void setCode(int addr, String v) {
+ if (featOkTst && casFeat_code == null)
+ jcas.throwFeatMissing("code", "org.spin.scrubber.uima.type.KnownPHI");
+ ll_cas.ll_setStringValue(addr, casFeatCode_code, v);}
+
+
+
+ /** @generated */
+ final Feature casFeat_ontology;
+ /** @generated */
+ final int casFeatCode_ontology;
+ /** @generated */
+ public String getOntology(int addr) {
+ if (featOkTst && casFeat_ontology == null)
+ jcas.throwFeatMissing("ontology", "org.spin.scrubber.uima.type.KnownPHI");
+ return ll_cas.ll_getStringValue(addr, casFeatCode_ontology);
+ }
+ /** @generated */
+ public void setOntology(int addr, String v) {
+ if (featOkTst && casFeat_ontology == null)
+ jcas.throwFeatMissing("ontology", "org.spin.scrubber.uima.type.KnownPHI");
+ ll_cas.ll_setStringValue(addr, casFeatCode_ontology, v);}
+
+
+
+ /** @generated */
+ final Feature casFeat_content;
+ /** @generated */
+ final int casFeatCode_content;
+ /** @generated */
+ public String getContent(int addr) {
+ if (featOkTst && casFeat_content == null)
+ jcas.throwFeatMissing("content", "org.spin.scrubber.uima.type.KnownPHI");
+ return ll_cas.ll_getStringValue(addr, casFeatCode_content);
+ }
+ /** @generated */
+ public void setContent(int addr, String v) {
+ if (featOkTst && casFeat_content == null)
+ jcas.throwFeatMissing("content", "org.spin.scrubber.uima.type.KnownPHI");
+ ll_cas.ll_setStringValue(addr, casFeatCode_content, v);}
+
+
+
+
+
+ /** initialize variables to correspond with Cas Type and Features
+ * @generated */
+ public KnownPHI_Type(JCas jcas, Type casType) {
+ super(jcas, casType);
+ casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator());
+
+
+ casFeat_code = jcas.getRequiredFeatureDE(casType, "code", "uima.cas.String", featOkTst);
+ casFeatCode_code = (null == casFeat_code) ? JCas.INVALID_FEATURE_CODE : ((FeatureImpl)casFeat_code).getCode();
+
+
+ casFeat_ontology = jcas.getRequiredFeatureDE(casType, "ontology", "uima.cas.String", featOkTst);
+ casFeatCode_ontology = (null == casFeat_ontology) ? JCas.INVALID_FEATURE_CODE : ((FeatureImpl)casFeat_ontology).getCode();
+
+
+ casFeat_content = jcas.getRequiredFeatureDE(casType, "content", "uima.cas.String", featOkTst);
+ casFeatCode_content = (null == casFeat_content) ? JCas.INVALID_FEATURE_CODE : ((FeatureImpl)casFeat_content).getCode();
+
+ }
+}
+
+
+
+
Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/KnownPHI_Type.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/OntologyMatch.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/OntologyMatch.java?rev=1500511&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/OntologyMatch.java (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/OntologyMatch.java Sun Jul 7 19:23:05 2013
@@ -0,0 +1,114 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ ******************************************************************************/
+/* First created by JCasGen Tue Aug 09 10:36:24 EDT 2011 */
+package org.spin.scrubber.uima.type;
+
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.JCasRegistry;
+import org.apache.uima.jcas.cas.TOP_Type;
+
+import org.apache.uima.jcas.tcas.Annotation;
+
+
+/**
+ * Updated by JCasGen Tue Aug 09 14:30:26 EDT 2011
+ * XML source: C:/dev/scrubber-pipeline_tmp/desc/consumer/consumer_printer_debug.xml
+ * @generated */
+public class OntologyMatch extends Annotation {
+ /** @generated
+ * @ordered
+ */
+ public final static int typeIndexID = JCasRegistry.register(OntologyMatch.class);
+ /** @generated
+ * @ordered
+ */
+ public final static int type = typeIndexID;
+ /** @generated */
+ public int getTypeIndexID() {return typeIndexID;}
+
+ /** Never called. Disable default constructor
+ * @generated */
+ protected OntologyMatch() {}
+
+ /** Internal - constructor used by generator
+ * @generated */
+ public OntologyMatch(int addr, TOP_Type type) {
+ super(addr, type);
+ readObject();
+ }
+
+ /** @generated */
+ public OntologyMatch(JCas jcas) {
+ super(jcas);
+ readObject();
+ }
+
+ /** @generated */
+ public OntologyMatch(JCas jcas, int begin, int end) {
+ super(jcas);
+ setBegin(begin);
+ setEnd(end);
+ readObject();
+ }
+
+ /** <!-- begin-user-doc -->
+ * Write your own initialization here
+ * <!-- end-user-doc -->
+ @generated modifiable */
+ private void readObject() {}
+
+
+
+ //*--------------*
+ //* Feature: code
+
+ /** getter for code - gets
+ * @generated */
+ public String getCode() {
+ if (OntologyMatch_Type.featOkTst && ((OntologyMatch_Type)jcasType).casFeat_code == null)
+ jcasType.jcas.throwFeatMissing("code", "org.spin.scrubber.type.OntologyMatch");
+ return jcasType.ll_cas.ll_getStringValue(addr, ((OntologyMatch_Type)jcasType).casFeatCode_code);}
+
+ /** setter for code - sets
+ * @generated */
+ public void setCode(String v) {
+ if (OntologyMatch_Type.featOkTst && ((OntologyMatch_Type)jcasType).casFeat_code == null)
+ jcasType.jcas.throwFeatMissing("code", "org.spin.scrubber.type.OntologyMatch");
+ jcasType.ll_cas.ll_setStringValue(addr, ((OntologyMatch_Type)jcasType).casFeatCode_code, v);}
+
+
+ //*--------------*
+ //* Feature: ontology
+
+ /** getter for ontology - gets
+ * @generated */
+ public String getOntology() {
+ if (OntologyMatch_Type.featOkTst && ((OntologyMatch_Type)jcasType).casFeat_ontology == null)
+ jcasType.jcas.throwFeatMissing("ontology", "org.spin.scrubber.type.OntologyMatch");
+ return jcasType.ll_cas.ll_getStringValue(addr, ((OntologyMatch_Type)jcasType).casFeatCode_ontology);}
+
+ /** setter for ontology - sets
+ * @generated */
+ public void setOntology(String v) {
+ if (OntologyMatch_Type.featOkTst && ((OntologyMatch_Type)jcasType).casFeat_ontology == null)
+ jcasType.jcas.throwFeatMissing("ontology", "org.spin.scrubber.type.OntologyMatch");
+ jcasType.ll_cas.ll_setStringValue(addr, ((OntologyMatch_Type)jcasType).casFeatCode_ontology, v);}
+ }
+
+
Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/OntologyMatch.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/OntologyMatch_Type.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/OntologyMatch_Type.java?rev=1500511&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/OntologyMatch_Type.java (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/OntologyMatch_Type.java Sun Jul 7 19:23:05 2013
@@ -0,0 +1,118 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ ******************************************************************************/
+/* First created by JCasGen Tue Aug 09 10:36:24 EDT 2011 */
+package org.spin.scrubber.uima.type;
+
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.JCasRegistry;
+import org.apache.uima.cas.impl.CASImpl;
+import org.apache.uima.cas.impl.FSGenerator;
+import org.apache.uima.cas.FeatureStructure;
+import org.apache.uima.cas.impl.TypeImpl;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.impl.FeatureImpl;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.jcas.tcas.Annotation_Type;
+
+/**
+ * Updated by JCasGen Tue Aug 09 14:30:27 EDT 2011
+ * @generated */
+public class OntologyMatch_Type extends Annotation_Type {
+ /** @generated */
+ protected FSGenerator getFSGenerator() {return fsGenerator;}
+ /** @generated */
+ private final FSGenerator fsGenerator =
+ new FSGenerator() {
+ public FeatureStructure createFS(int addr, CASImpl cas) {
+ if (OntologyMatch_Type.this.useExistingInstance) {
+ // Return eq fs instance if already created
+ FeatureStructure fs = OntologyMatch_Type.this.jcas.getJfsFromCaddr(addr);
+ if (null == fs) {
+ fs = new OntologyMatch(addr, OntologyMatch_Type.this);
+ OntologyMatch_Type.this.jcas.putJfsFromCaddr(addr, fs);
+ return fs;
+ }
+ return fs;
+ } else return new OntologyMatch(addr, OntologyMatch_Type.this);
+ }
+ };
+ /** @generated */
+ public final static int typeIndexID = OntologyMatch.typeIndexID;
+ /** @generated
+ @modifiable */
+ public final static boolean featOkTst = JCasRegistry.getFeatOkTst("org.spin.scrubber.type.OntologyMatch");
+
+ /** @generated */
+ final Feature casFeat_code;
+ /** @generated */
+ final int casFeatCode_code;
+ /** @generated */
+ public String getCode(int addr) {
+ if (featOkTst && casFeat_code == null)
+ jcas.throwFeatMissing("code", "org.spin.scrubber.type.OntologyMatch");
+ return ll_cas.ll_getStringValue(addr, casFeatCode_code);
+ }
+ /** @generated */
+ public void setCode(int addr, String v) {
+ if (featOkTst && casFeat_code == null)
+ jcas.throwFeatMissing("code", "org.spin.scrubber.type.OntologyMatch");
+ ll_cas.ll_setStringValue(addr, casFeatCode_code, v);}
+
+
+
+ /** @generated */
+ final Feature casFeat_ontology;
+ /** @generated */
+ final int casFeatCode_ontology;
+ /** @generated */
+ public String getOntology(int addr) {
+ if (featOkTst && casFeat_ontology == null)
+ jcas.throwFeatMissing("ontology", "org.spin.scrubber.type.OntologyMatch");
+ return ll_cas.ll_getStringValue(addr, casFeatCode_ontology);
+ }
+ /** @generated */
+ public void setOntology(int addr, String v) {
+ if (featOkTst && casFeat_ontology == null)
+ jcas.throwFeatMissing("ontology", "org.spin.scrubber.type.OntologyMatch");
+ ll_cas.ll_setStringValue(addr, casFeatCode_ontology, v);}
+
+
+
+
+
+ /** initialize variables to correspond with Cas Type and Features
+ * @generated */
+ public OntologyMatch_Type(JCas jcas, Type casType) {
+ super(jcas, casType);
+ casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator());
+
+
+ casFeat_code = jcas.getRequiredFeatureDE(casType, "code", "uima.cas.String", featOkTst);
+ casFeatCode_code = (null == casFeat_code) ? JCas.INVALID_FEATURE_CODE : ((FeatureImpl)casFeat_code).getCode();
+
+
+ casFeat_ontology = jcas.getRequiredFeatureDE(casType, "ontology", "uima.cas.String", featOkTst);
+ casFeatCode_ontology = (null == casFeat_ontology) ? JCas.INVALID_FEATURE_CODE : ((FeatureImpl)casFeat_ontology).getCode();
+
+ }
+}
+
+
+
+
Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/uima/type/OntologyMatch_Type.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/sandbox/ctakes-scrubber-deid/src/main/resources/log4j.properties
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/main/resources/log4j.properties?rev=1500511&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/src/main/resources/log4j.properties (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/src/main/resources/log4j.properties Sun Jul 7 19:23:05 2013
@@ -0,0 +1,14 @@
+# DEFAULT ROOT CONFIGURATION
+log4j.rootLogger=INFO, R
+log4j.appender.R=org.apache.log4j.ConsoleAppender
+log4j.appender.R.layout=org.apache.log4j.PatternLayout
+log4j.appender.R.layout.ConversionPattern=[%d{yyyy-MMM-dd-HH:mm:ss.SSS}][%p] [Root] [%C{1}] [%m] %n
+
+# ETL Runtime : logging to file
+log4j.logger.org.spin.scrubber=INFO, scrubber
+log4j.appender.scrubber=org.apache.log4j.FileAppender
+log4j.appender.scrubber.Threshold=DEBUG
+log4j.appender.scrubber.File=scrubber-runtime.log
+log4j.appender.scrubber.Append=true
+log4j.appender.scrubber.layout=org.apache.log4j.PatternLayout
+log4j.appender.scrubber.layout.ConversionPattern=[%d{yyyy-MMM-dd-HH:mm:ss.SSS}][%p] [Scrubber] [%C{1}] [%m] %n
Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/main/resources/log4j.properties
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/sandbox/ctakes-scrubber-deid/src/test/java/org/spin/scrubber/ScrubberPropertiesTest.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/test/java/org/spin/scrubber/ScrubberPropertiesTest.java?rev=1500511&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/src/test/java/org/spin/scrubber/ScrubberPropertiesTest.java (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/src/test/java/org/spin/scrubber/ScrubberPropertiesTest.java Sun Jul 7 19:23:05 2013
@@ -0,0 +1,64 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ ******************************************************************************/
+package org.spin.scrubber;
+
+import junit.framework.TestCase;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+
+/**
+ * @author Andrew McMurry, MS
+ * <p/>
+ * With primary support from Children's Hospital Informatics Program @
+ * Harvard-MIT Health Sciences and Technology and
+ * <p/>
+ * Secondary support from the Harvard Medical School
+ * Center for BioMedical Informatics
+ * <p/>
+ * PHD candidate, Boston University Bioinformatics
+ * Member, I2b2 National Center for Biomedical Computing
+ * <p/>
+ * All works licensed under LGPL
+ * <p/>
+ * User: andy
+ * Date: 6/19/12
+ * Time: 6:08 PM
+ */
+public class ScrubberPropertiesTest extends TestCase
+{
+ private static Logger log = Logger.getLogger(ScrubberPropertiesTest.class);
+
+ public void test() throws IOException
+ {
+ log.info(ScrubberProperties.asString());
+
+ System.out.println(ScrubberProperties.asUnixShellScript());
+ System.out.println(ScrubberProperties.asWindowsShellScript());
+
+ ScrubberProperties.writeUnixShellScript();
+
+
+ assertTrue("scrubber.properties did not validate " +
+ "This is most likely because your scrubber database tables are not setup.",
+ ScrubberProperties.validate());
+
+
+ }
+}
Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/test/java/org/spin/scrubber/ScrubberPropertiesTest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/sandbox/ctakes-scrubber-deid/src/test/java/org/spin/scrubber/TemplateFileProcessorTest.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/test/java/org/spin/scrubber/TemplateFileProcessorTest.java?rev=1500511&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/src/test/java/org/spin/scrubber/TemplateFileProcessorTest.java (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/src/test/java/org/spin/scrubber/TemplateFileProcessorTest.java Sun Jul 7 19:23:05 2013
@@ -0,0 +1,51 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ ******************************************************************************/
+package org.spin.scrubber;
+
+import junit.framework.TestCase;
+import org.apache.log4j.Logger;
+import org.spin.scrubber.templates.TemplateFileProcessor;
+
+/**
+ * @author Andrew McMurry, MS
+ * <p/>
+ * With primary support from Children's Hospital Informatics Program @
+ * Harvard-MIT Health Sciences and Technology and
+ * <p/>
+ * Secondary support from the Harvard Medical School
+ * Center for BioMedical Informatics
+ * <p/>
+ * PHD candidate, Boston University Bioinformatics
+ * Member, I2b2 National Center for Biomedical Computing
+ * <p/>
+ * All works licensed under LGPL
+ * <p/>
+ * User: andy
+ * Date: 6/20/12
+ * Time: 11:56 AM
+ */
+public class TemplateFileProcessorTest extends TestCase
+{
+ private static Logger log = Logger.getLogger(TemplateFileProcessorTest.class);
+
+ public void test() throws Exception
+ {
+ TemplateFileProcessor.processTemplatesAllKnown();
+ }
+}
Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/test/java/org/spin/scrubber/TemplateFileProcessorTest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/sandbox/ctakes-scrubber-deid/src/test/java/org/spin/scrubber/classification/HumanAnnotationsExtractorProtegeTest.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/test/java/org/spin/scrubber/classification/HumanAnnotationsExtractorProtegeTest.java?rev=1500511&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/src/test/java/org/spin/scrubber/classification/HumanAnnotationsExtractorProtegeTest.java (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/src/test/java/org/spin/scrubber/classification/HumanAnnotationsExtractorProtegeTest.java Sun Jul 7 19:23:05 2013
@@ -0,0 +1,63 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ ******************************************************************************/
+package org.spin.scrubber.classification;
+
+import junit.framework.TestCase;
+import org.apache.log4j.Logger;
+import org.spin.scrubber.ScrubberProperties;
+import org.spin.scrubber.classification.HumanAnnotationsExtractorProtege;
+
+import java.io.IOException;
+
+/**
+ * @author Andrew McMurry, MS
+ * <p/>
+ * With primary support from Children's Hospital Informatics Program @
+ * Harvard-MIT Health Sciences and Technology and
+ * <p/>
+ * Secondary support from the Harvard Medical School
+ * Center for BioMedical Informatics
+ * <p/>
+ * PHD candidate, Boston University Bioinformatics
+ * Member, I2b2 National Center for Biomedical Computing
+ * <p/>
+ * All works licensed under LGPL
+ * <p/>
+ * User: andy
+ * Date: 6/19/12
+ * Time: 7:30 PM
+ */
+public class HumanAnnotationsExtractorProtegeTest extends TestCase
+{
+ private static Logger log = Logger.getLogger(HumanAnnotationsExtractorProtegeTest.class);
+
+ public void test() throws IOException
+ {
+ if(ScrubberProperties.isDBAvailableForTesting())
+ {
+ HumanAnnotationsExtractorProtege extractor = new HumanAnnotationsExtractorProtege(ScrubberProperties.getDirInputHumanAnnotationsTest(), "_test");
+
+ extractor.parseHumanAnnotations();
+ }
+ else
+ {
+ log.warn("Testing Database not available, skipping test assertions.");
+ }
+ }
+}
Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/test/java/org/spin/scrubber/classification/HumanAnnotationsExtractorProtegeTest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/sandbox/ctakes-scrubber-deid/src/test/java/org/spin/scrubber/org/spin/scrubber/uima/consumer/CSVAnnotationTest.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/test/java/org/spin/scrubber/org/spin/scrubber/uima/consumer/CSVAnnotationTest.java?rev=1500511&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/src/test/java/org/spin/scrubber/org/spin/scrubber/uima/consumer/CSVAnnotationTest.java (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/src/test/java/org/spin/scrubber/org/spin/scrubber/uima/consumer/CSVAnnotationTest.java Sun Jul 7 19:23:05 2013
@@ -0,0 +1,41 @@
+package org.spin.scrubber.org.spin.scrubber.uima.consumer;
+
+import junit.framework.TestCase;
+import org.spin.scrubber.uima.consumer.CSVAnnotation;
+
+import java.io.IOException;
+
+import static org.spin.scrubber.uima.consumer.CSVAnnotation._Sentence;
+import static org.spin.scrubber.uima.consumer.CSVAnnotation.getCSV;
+import static org.spin.scrubber.uima.consumer.CSVAnnotation.log;
+
+/**
+ * @author Andrew McMurry, MS
+ * Created: 4/4/13
+ */
+public class CSVAnnotationTest extends TestCase
+{
+ public void testSentenceWriter() throws IOException
+ {
+ log.debug("headers = "+ _Sentence.getHeaders());
+
+ _Sentence.writeLine("1.txt", 1, 10);
+ _Sentence.writeLine("1.txt", 11, 20);
+ _Sentence.close();
+ _Sentence.delete();
+ }
+
+ public void testCSV()
+ {
+ assertEquals("1,2,3",
+ getCSV(CSVAnnotation.asStrings(1, 2, 3)).toString());
+ }
+
+
+ static String exampleTextWithEscapeChar = "PunctuationToken,'1.txt',1,2,3,ADJ,\\";
+
+ public void testSpecialChars()
+ {
+ System.out.println(CSVAnnotation.getCoveredText(exampleTextWithEscapeChar));
+ }
+}
Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/test/java/org/spin/scrubber/org/spin/scrubber/uima/consumer/CSVAnnotationTest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/sandbox/ctakes-scrubber-deid/src/test/java/org/spin/scrubber/uima/annotator/DictionaryAnnotatorTest.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/test/java/org/spin/scrubber/uima/annotator/DictionaryAnnotatorTest.java?rev=1500511&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/src/test/java/org/spin/scrubber/uima/annotator/DictionaryAnnotatorTest.java (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/src/test/java/org/spin/scrubber/uima/annotator/DictionaryAnnotatorTest.java Sun Jul 7 19:23:05 2013
@@ -0,0 +1,44 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ ******************************************************************************/
+package org.spin.scrubber.uima.annotator;
+
+import java.util.Set;
+
+import junit.framework.TestCase;
+
+public class DictionaryAnnotatorTest extends TestCase
+{
+ public void testPermutations()
+ {
+ DictionaryAnnotator a = new DictionaryAnnotator();
+ Set<String> perms = a.generatePermutations("a chest x-ray");
+ assertEquals(perms.size(), 3);
+
+ perms = a.generatePermutations("intravenous lasix");
+ assertEquals(perms.size(), 1);
+
+ perms = a.generatePermutations("non insulin dependent diabetes mellitus");
+ assertEquals(perms.size(), 7);
+
+// for (String s : perms)
+// {
+// System.out.println("s: " + s);
+// }
+ }
+}
Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/test/java/org/spin/scrubber/uima/annotator/DictionaryAnnotatorTest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/sandbox/ctakes-scrubber-deid/src/test/resources/log4j.properties
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/test/resources/log4j.properties?rev=1500511&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/src/test/resources/log4j.properties (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/src/test/resources/log4j.properties Sun Jul 7 19:23:05 2013
@@ -0,0 +1,15 @@
+# DEFAULT ROOT CONFIGURATION
+log4j.rootLogger=debug, R
+log4j.appender.R=org.apache.log4j.ConsoleAppender
+log4j.appender.R.layout=org.apache.log4j.PatternLayout
+log4j.appender.R.layout.ConversionPattern=[%d{yyyy-MMM-dd-HH:mm:ss.SSS}][%p] [Root] [%C{1}] [%m] %n
+
+# ETL Runtime : logging to file
+log4j.logger.org.spin.loader.scrubber=debug, scrubber
+log4j.appender.scrubber=org.apache.log4j.FileAppender
+log4j.appender.scrubber.Threshold=DEBUG
+log4j.appender.scrubber.File=scrubber-runtime.log
+log4j.appender.scrubber.Append=true
+log4j.appender.scrubber.layout=org.apache.log4j.PatternLayout
+log4j.appender.scrubber.layout.ConversionPattern=[%d{yyyy-MMM-dd-HH:mm:ss.SSS}][%p] [Scrubber] [%C{1}] [%m] %n
+
Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/test/resources/log4j.properties
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/sandbox/ctakes-scrubber-deid/src/test/resources/scrubber.properties
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/test/resources/scrubber.properties?rev=1500511&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/src/test/resources/scrubber.properties (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/src/test/resources/scrubber.properties Sun Jul 7 19:23:05 2013
@@ -0,0 +1,50 @@
+################ DATABASE ########################################
+MYSQL_ADMIN_USER=root
+MYSQL_ADMIN_PWD=
+
+DB_DRIVER=com.mysql.jdbc.Driver
+DB_NAME=scrubber
+DB_USER=scrubber
+DB_PWD=scrubber
+DB_URI=jdbc:mysql://localhost:3306/scrubber
+DIR_DB_TEMP=/tmp
+
+#### HUMAN ANNOTATED FILES
+DIR_INPUT_HUMAN_ANNOTATIONS_TRAIN=data/input/phi/train
+DIR_INPUT_HUMAN_ANNOTATIONS_TEST=data/input/phi/test
+HUMAN_ANNOTATIONS_IMPLEMENTATION=org.spin.scrubber.classification.HumanAnnotationsExtractorProtege
+
+#### MEDICAL JOURNAL PUBLICATIONS
+DIR_INPUT_PUBS_XML=data/input/pubs/xml
+DIR_INPUT_PUBS_TXT=data/input/pubs/txt
+DIR_INPUT_PUBS_PROCESSED=data/input/pubs/processed
+
+### CASES ####
+DIR_INPUT_PUBS=data/input/pubs
+DIR_INPUT_TRAIN=data/input/cases/train
+DIR_INPUT_TEST=data/input/cases/test
+#DIR_OUTPUT_TRAIN=data/scrubbed/train
+DIR_OUTPUT_TEST=data/scrubbed/test
+
+### CLASSIFICATION
+DIR_MODELS=data/models
+FILE_MODEL_TRAIN=train.arff
+FILE_MODEL_TEST=test.arff
+
+### UIMA
+UIMA_READER_FILE_TRAIN=reader_files_train.xml
+UIMA_READER_FILE_TEST=reader_files_test.xml
+UIMA_READER_FILE_PUBS=reader_files_pubs.xml
+UIMA_READER_IMPL_TRAIN=org.spin.scrubber.uima.reader.FileSystemCollectionReaderXML
+UIMA_READER_IMPL_TEST=org.spin.scrubber.uima.reader.FileSystemCollectionReaderXML
+UIMA_READER_IMPL_PUBS=org.spin.scrubber.uima.reader.FileSystemCollectionReader
+
+CLASSIFICATION_COST_MATRIX="[0.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0; 100.0 0.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0; 100.0 1.0 0.0 1.0 1.0 1.0 1.0 1.0 1.0; 100.0 1.0 1.0 0.0 1.0 1.0 1.0 1.0 1.0; 100.0 1.0 1.0 1.0 0.0 1.0 1.0 1.0 1.0; 100.0 1.0 1.0 1.0 1.0 0.0 1.0 1.0 1.0; 100.0 1.0 1.0 1.0 1.0 1.0 0.0 1.0 1.0; 100.0 1.0 1.0 1.0 1.0 1.0 1.0 0.0 1.0; 100.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 0.0]"
+CONSUMER_ANNOT_PRINTER_OUTFILE=out/annotations.txt
+
+# Localhost settings
+LOCALHOST_NUM_THREADS=2
+EXPORT=export
+
+## TESTING purposes
+TEST_DB_AVAILABLE=false
Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/test/resources/scrubber.properties
------------------------------------------------------------------------------
svn:mime-type = text/plain