You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by br...@apache.org on 2013/07/07 21:24:12 UTC
svn commit: r1500512 - in /ctakes/sandbox/ctakes-scrubber-deid/desc/reader:
TEMPLATE.reader_files_pubs.xml TEMPLATE.reader_files_test.xml
TEMPLATE.reader_files_train.xml
Author: brittfitch
Date: Sun Jul 7 19:24:12 2013
New Revision: 1500512
URL: http://svn.apache.org/r1500512
Log:
add uima reader template files
Added:
ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_pubs.xml (with props)
ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_test.xml (with props)
ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_train.xml (with props)
Added: ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_pubs.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_pubs.xml?rev=1500512&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_pubs.xml (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_pubs.xml Sun Jul 7 19:24:12 2013
@@ -0,0 +1,125 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier">
+ <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+
+ <!-- TODO: use scrubber.properties -->
+ <implementationName>$UIMA_READER_IMPL_PUBS</implementationName>
+
+ <processingResourceMetaData>
+ <name>File System Collection Reader</name>
+ <description>Reads files from the filesystem. This CollectionReader may be used
+ with or without a CAS Initializer. If a CAS Initializer is supplied, it will
+ be passed an InputStream to the file and must populate the CAS from that
+ InputStream. If no CAS Initializer is supplied, this CollectionReader will
+ read the file itself and set treat the entire contents of the file as the
+ document to be inserted into the CAS.</description>
+ <version>1.0</version>
+ <vendor>The Apache Software Foundation</vendor>
+ <configurationParameters>
+ <configurationParameter>
+ <name>InputDirectory</name>
+ <description>Directory containing input files</description>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>true</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>Encoding</name>
+ <description>Character encoding for the documents. If not specified,
+ the default system encoding will be used. Note that this parameter
+ only applies if there is no CAS Initializer provided; otherwise,
+ it is the CAS Initializer's responsibility to deal with character
+ encoding issues.</description>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>Language</name>
+ <description>ISO language code for the documents</description>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>BrowseSubdirectories</name>
+ <description>True means include files of subdirectories, recursively, of the input directory.</description>
+ <type>Boolean</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ </configurationParameters>
+ <configurationParameterSettings>
+ <nameValuePair>
+ <name>InputDirectory</name>
+ <value>
+ <string>$DIR_INPUT_PUBS_PROCESSED</string>
+ </value>
+ </nameValuePair>
+ <nameValuePair>
+ <name>BrowseSubdirectories</name>
+ <value>
+ <boolean>false</boolean>
+ </value>
+ </nameValuePair>
+ </configurationParameterSettings>
+ <typeSystemDescription>
+ <imports>
+ <import name="org.apache.uima.examples.SourceDocumentInformation"/>
+ <!-- TODO: rename KnownPHI to Human Annotation -->
+ <import location="../type/KnownPHITypeSystem.xml"/>
+ </imports>
+ </typeSystemDescription>
+ <typePriorities/>
+ <fsIndexCollection>
+ <fsIndexes>
+ <fsIndexDescription>
+ <label>KnownPHIIndex</label>
+ <typeName>org.spin.scrubber.uima.type.KnownPHI</typeName>
+ <kind>sorted</kind>
+ <keys>
+ <fsIndexKey>
+ <featureName>begin</featureName>
+ <comparator>standard</comparator>
+ </fsIndexKey>
+ </keys>
+ </fsIndexDescription>
+ </fsIndexes>
+ </fsIndexCollection>
+ <capabilities>
+ <capability>
+ <inputs/>
+ <outputs>
+ <type allAnnotatorFeatures="true">org.apache.uima.examples.SourceDocumentInformation</type>
+ <!-- TODO: rename KnownPHI to Human Annotation -->
+ <type allAnnotatorFeatures="true">org.spin.scrubber.uima.type.KnownPHI</type>
+ </outputs>
+ <languagesSupported/>
+ </capability>
+ </capabilities>
+ <operationalProperties>
+ <modifiesCas>true</modifiesCas>
+ <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
+ <outputsNewCASes>true</outputsNewCASes>
+ </operationalProperties>
+ </processingResourceMetaData>
+ <resourceManagerConfiguration/>
+</collectionReaderDescription>
Propchange: ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_pubs.xml
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_test.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_test.xml?rev=1500512&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_test.xml (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_test.xml Sun Jul 7 19:24:12 2013
@@ -0,0 +1,158 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier">
+ <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+ <implementationName>$UIMA_READER_IMPL_TEST</implementationName>
+ <processingResourceMetaData>
+ <name>File System Collection Reader</name>
+ <description>Reads files from the filesystem. This CollectionReader may be used
+ with or without a CAS Initializer. If a CAS Initializer is supplied, it will
+ be passed an InputStream to the file and must populate the CAS from that
+ InputStream. If no CAS Initializer is supplied, this CollectionReader will
+ read the file itself and set treat the entire contents of the file as the
+ document to be inserted into the CAS.</description>
+ <version>1.0</version>
+ <vendor>The Apache Software Foundation</vendor>
+ <configurationParameters>
+ <configurationParameter>
+ <name>KnownPHINodeList</name>
+ <description>List of XPaths to specific fields known to contain ONLY PHI.</description>
+ <type>String</type>
+ <multiValued>true</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>ScrubNodeList</name>
+ <description>List of XPaths to scrub</description>
+ <type>String</type>
+ <multiValued>true</multiValued>
+ <mandatory>true</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>InputDirectory</name>
+ <description>Directory containing input files</description>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>true</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>Encoding</name>
+ <description>Character encoding for the documents. If not specified,
+ the default system encoding will be used. Note that this parameter
+ only applies if there is no CAS Initializer provided; otherwise,
+ it is the CAS Initializer's responsibility to deal with character
+ encoding issues.</description>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>Language</name>
+ <description>ISO language code for the documents</description>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>BrowseSubdirectories</name>
+ <description>True means include files of subdirectories, recursively, of the input directory.</description>
+ <type>Boolean</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ </configurationParameters>
+ <configurationParameterSettings>
+ <nameValuePair>
+ <name>ScrubNodeList</name>
+ <value>
+ <array>
+ <string>/Envelope/Body/PathologyCase/FullReportData</string>
+ <string>/Envelope/Body/PathologyCase/FullReportText</string>
+ <string>/Envelope/Body/PathologyCase/GrossDescriptionText</string>
+ <string>/Envelope/Body/PathologyCase/DiagnosisText</string>
+ </array>
+ </value>
+ </nameValuePair>
+ <nameValuePair>
+ <name>KnownPHINodeList</name>
+ <value>
+ <array>
+ <string>/Envelope/Header/Identifiers/FirstName</string>
+ <string>/Envelope/Header/Identifiers/LastName</string>
+ <string>/Envelope/Header/Identifiers/DateOfBirth</string>
+ <string>/Envelope/Header/Identifiers/SSN</string>
+ <string>/Envelope/Header/Identifiers/AccessionNumber</string>
+ <string>/Envelope/Header/Identifiers/LocalMRN</string>
+ </array>
+ </value>
+ </nameValuePair>
+ <nameValuePair>
+ <name>InputDirectory</name>
+ <value>
+ <string>$DIR_INPUT_TEST</string>
+ </value>
+ </nameValuePair>
+ <nameValuePair>
+ <name>BrowseSubdirectories</name>
+ <value>
+ <boolean>false</boolean>
+ </value>
+ </nameValuePair>
+ </configurationParameterSettings>
+ <typeSystemDescription>
+ <imports>
+ <import name="org.apache.uima.examples.SourceDocumentInformation"/>
+ <import location="../type/KnownPHITypeSystem.xml"/>
+ </imports>
+ </typeSystemDescription>
+ <typePriorities/>
+ <fsIndexCollection>
+ <fsIndexes>
+ <fsIndexDescription>
+ <label>KnownPHIIndex</label>
+ <typeName>org.spin.scrubber.uima.type.KnownPHI</typeName>
+ <kind>sorted</kind>
+ <keys>
+ <fsIndexKey>
+ <featureName>begin</featureName>
+ <comparator>standard</comparator>
+ </fsIndexKey>
+ </keys>
+ </fsIndexDescription>
+ </fsIndexes>
+ </fsIndexCollection>
+ <capabilities>
+ <capability>
+ <inputs/>
+ <outputs>
+ <type allAnnotatorFeatures="true">org.apache.uima.examples.SourceDocumentInformation</type>
+ <type allAnnotatorFeatures="true">org.spin.scrubber.uima.type.KnownPHI</type>
+ </outputs>
+ <languagesSupported/>
+ </capability>
+ </capabilities>
+ <operationalProperties>
+ <modifiesCas>true</modifiesCas>
+ <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
+ <outputsNewCASes>true</outputsNewCASes>
+ </operationalProperties>
+ </processingResourceMetaData>
+ <resourceManagerConfiguration/>
+</collectionReaderDescription>
Propchange: ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_test.xml
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_train.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_train.xml?rev=1500512&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_train.xml (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_train.xml Sun Jul 7 19:24:12 2013
@@ -0,0 +1,158 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier">
+ <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+ <implementationName>$UIMA_READER_IMPL_TRAIN</implementationName>
+ <processingResourceMetaData>
+ <name>File System Collection Reader</name>
+ <description>Reads files from the filesystem. This CollectionReader may be used
+ with or without a CAS Initializer. If a CAS Initializer is supplied, it will
+ be passed an InputStream to the file and must populate the CAS from that
+ InputStream. If no CAS Initializer is supplied, this CollectionReader will
+ read the file itself and set treat the entire contents of the file as the
+ document to be inserted into the CAS.</description>
+ <version>1.0</version>
+ <vendor>The Apache Software Foundation</vendor>
+ <configurationParameters>
+ <configurationParameter>
+ <name>KnownPHINodeList</name>
+ <description>List of XPaths to specific fields known to contain ONLY PHI.</description>
+ <type>String</type>
+ <multiValued>true</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>ScrubNodeList</name>
+ <description>List of XPaths to scrub</description>
+ <type>String</type>
+ <multiValued>true</multiValued>
+ <mandatory>true</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>InputDirectory</name>
+ <description>Directory containing input files</description>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>true</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>Encoding</name>
+ <description>Character encoding for the documents. If not specified,
+ the default system encoding will be used. Note that this parameter
+ only applies if there is no CAS Initializer provided; otherwise,
+ it is the CAS Initializer's responsibility to deal with character
+ encoding issues.</description>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>Language</name>
+ <description>ISO language code for the documents</description>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>BrowseSubdirectories</name>
+ <description>True means include files of subdirectories, recursively, of the input directory.</description>
+ <type>Boolean</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ </configurationParameters>
+ <configurationParameterSettings>
+ <nameValuePair>
+ <name>ScrubNodeList</name>
+ <value>
+ <array>
+ <string>/Envelope/Body/PathologyCase/FullReportData</string>
+ <string>/Envelope/Body/PathologyCase/FullReportText</string>
+ <string>/Envelope/Body/PathologyCase/GrossDescriptionText</string>
+ <string>/Envelope/Body/PathologyCase/DiagnosisText</string>
+ </array>
+ </value>
+ </nameValuePair>
+ <nameValuePair>
+ <name>KnownPHINodeList</name>
+ <value>
+ <array>
+ <string>/Envelope/Header/Identifiers/FirstName</string>
+ <string>/Envelope/Header/Identifiers/LastName</string>
+ <string>/Envelope/Header/Identifiers/DateOfBirth</string>
+ <string>/Envelope/Header/Identifiers/SSN</string>
+ <string>/Envelope/Header/Identifiers/AccessionNumber</string>
+ <string>/Envelope/Header/Identifiers/LocalMRN</string>
+ </array>
+ </value>
+ </nameValuePair>
+ <nameValuePair>
+ <name>InputDirectory</name>
+ <value>
+ <string>$DIR_INPUT_TRAIN</string>
+ </value>
+ </nameValuePair>
+ <nameValuePair>
+ <name>BrowseSubdirectories</name>
+ <value>
+ <boolean>false</boolean>
+ </value>
+ </nameValuePair>
+ </configurationParameterSettings>
+ <typeSystemDescription>
+ <imports>
+ <import name="org.apache.uima.examples.SourceDocumentInformation"/>
+ <import location="../type/KnownPHITypeSystem.xml"/>
+ </imports>
+ </typeSystemDescription>
+ <typePriorities/>
+ <fsIndexCollection>
+ <fsIndexes>
+ <fsIndexDescription>
+ <label>KnownPHIIndex</label>
+ <typeName>org.spin.scrubber.uima.type.KnownPHI</typeName>
+ <kind>sorted</kind>
+ <keys>
+ <fsIndexKey>
+ <featureName>begin</featureName>
+ <comparator>standard</comparator>
+ </fsIndexKey>
+ </keys>
+ </fsIndexDescription>
+ </fsIndexes>
+ </fsIndexCollection>
+ <capabilities>
+ <capability>
+ <inputs/>
+ <outputs>
+ <type allAnnotatorFeatures="true">org.apache.uima.examples.SourceDocumentInformation</type>
+ <type allAnnotatorFeatures="true">org.spin.scrubber.uima.type.KnownPHI</type>
+ </outputs>
+ <languagesSupported/>
+ </capability>
+ </capabilities>
+ <operationalProperties>
+ <modifiesCas>true</modifiesCas>
+ <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
+ <outputsNewCASes>true</outputsNewCASes>
+ </operationalProperties>
+ </processingResourceMetaData>
+ <resourceManagerConfiguration/>
+</collectionReaderDescription>
Propchange: ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_train.xml
------------------------------------------------------------------------------
svn:mime-type = text/plain