You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by br...@apache.org on 2013/07/07 21:24:12 UTC

svn commit: r1500512 - in /ctakes/sandbox/ctakes-scrubber-deid/desc/reader: TEMPLATE.reader_files_pubs.xml TEMPLATE.reader_files_test.xml TEMPLATE.reader_files_train.xml

Author: brittfitch
Date: Sun Jul  7 19:24:12 2013
New Revision: 1500512

URL: http://svn.apache.org/r1500512
Log:
add uima reader template files

Added:
    ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_pubs.xml   (with props)
    ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_test.xml   (with props)
    ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_train.xml   (with props)

Added: ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_pubs.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_pubs.xml?rev=1500512&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_pubs.xml (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_pubs.xml Sun Jul  7 19:24:12 2013
@@ -0,0 +1,125 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+  
+    http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+-->
+<collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+
+    <!-- TODO: use scrubber.properties -->
+  <implementationName>$UIMA_READER_IMPL_PUBS</implementationName>
+
+  <processingResourceMetaData>
+    <name>File System Collection Reader</name>
+    <description>Reads files from the filesystem.  This CollectionReader may be used
+          with or without a CAS Initializer.  If a CAS Initializer is supplied, it will
+          be passed an InputStream to the file and must populate the CAS from that
+          InputStream.  If no CAS Initializer is supplied, this CollectionReader will
+          read the file itself and set treat the entire contents of the file as the
+          document to be inserted into the CAS.</description>
+    <version>1.0</version>
+    <vendor>The Apache Software Foundation</vendor>
+    <configurationParameters>
+      <configurationParameter>
+        <name>InputDirectory</name>
+        <description>Directory containing input files</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>Encoding</name>
+        <description>Character encoding for the documents.  If not specified,
+                   the default system encoding will be used.  Note that this parameter
+                   only applies if there is no CAS Initializer provided; otherwise,
+                   it is the CAS Initializer's responsibility to deal with character
+                   encoding issues.</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>Language</name>
+        <description>ISO language code for the documents</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>BrowseSubdirectories</name>
+        <description>True means include files of subdirectories, recursively, of the input directory.</description>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>InputDirectory</name>
+        <value>
+          <string>$DIR_INPUT_PUBS_PROCESSED</string>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>BrowseSubdirectories</name>
+        <value>
+          <boolean>false</boolean>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription>
+      <imports>
+        <import name="org.apache.uima.examples.SourceDocumentInformation"/>
+          <!-- TODO: rename KnownPHI to Human Annotation -->
+        <import location="../type/KnownPHITypeSystem.xml"/>
+      </imports>
+    </typeSystemDescription>
+    <typePriorities/>
+    <fsIndexCollection>
+      <fsIndexes>
+        <fsIndexDescription>
+          <label>KnownPHIIndex</label>
+          <typeName>org.spin.scrubber.uima.type.KnownPHI</typeName>
+          <kind>sorted</kind>
+          <keys>
+            <fsIndexKey>
+              <featureName>begin</featureName>
+              <comparator>standard</comparator>
+            </fsIndexKey>
+          </keys>
+        </fsIndexDescription>
+      </fsIndexes>
+    </fsIndexCollection>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs>
+          <type allAnnotatorFeatures="true">org.apache.uima.examples.SourceDocumentInformation</type>
+            <!-- TODO: rename KnownPHI to Human Annotation -->
+          <type allAnnotatorFeatures="true">org.spin.scrubber.uima.type.KnownPHI</type>
+        </outputs>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
+      <outputsNewCASes>true</outputsNewCASes>
+    </operationalProperties>
+  </processingResourceMetaData>
+  <resourceManagerConfiguration/>
+</collectionReaderDescription>

Propchange: ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_pubs.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_test.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_test.xml?rev=1500512&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_test.xml (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_test.xml Sun Jul  7 19:24:12 2013
@@ -0,0 +1,158 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+  
+    http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+-->
+<collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <implementationName>$UIMA_READER_IMPL_TEST</implementationName>
+  <processingResourceMetaData>
+    <name>File System Collection Reader</name>
+    <description>Reads files from the filesystem.  This CollectionReader may be used
+          with or without a CAS Initializer.  If a CAS Initializer is supplied, it will
+          be passed an InputStream to the file and must populate the CAS from that
+          InputStream.  If no CAS Initializer is supplied, this CollectionReader will
+          read the file itself and set treat the entire contents of the file as the
+          document to be inserted into the CAS.</description>
+    <version>1.0</version>
+    <vendor>The Apache Software Foundation</vendor>
+    <configurationParameters>
+      <configurationParameter>
+        <name>KnownPHINodeList</name>
+        <description>List of XPaths to specific fields known to contain ONLY PHI.</description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>ScrubNodeList</name>
+        <description>List of XPaths to scrub</description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>InputDirectory</name>
+        <description>Directory containing input files</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>Encoding</name>
+        <description>Character encoding for the documents.  If not specified,
+                   the default system encoding will be used.  Note that this parameter
+                   only applies if there is no CAS Initializer provided; otherwise,
+                   it is the CAS Initializer's responsibility to deal with character
+                   encoding issues.</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>Language</name>
+        <description>ISO language code for the documents</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>BrowseSubdirectories</name>
+        <description>True means include files of subdirectories, recursively, of the input directory.</description>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>ScrubNodeList</name>
+        <value>
+	        <array>
+	          <string>/Envelope/Body/PathologyCase/FullReportData</string>
+	          <string>/Envelope/Body/PathologyCase/FullReportText</string>
+	          <string>/Envelope/Body/PathologyCase/GrossDescriptionText</string>
+	          <string>/Envelope/Body/PathologyCase/DiagnosisText</string>
+	        </array>
+	   	</value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>KnownPHINodeList</name>
+        <value>
+	        <array>
+	          <string>/Envelope/Header/Identifiers/FirstName</string>
+	          <string>/Envelope/Header/Identifiers/LastName</string>
+	          <string>/Envelope/Header/Identifiers/DateOfBirth</string>
+	          <string>/Envelope/Header/Identifiers/SSN</string>
+	          <string>/Envelope/Header/Identifiers/AccessionNumber</string>
+	          <string>/Envelope/Header/Identifiers/LocalMRN</string>
+	        </array>
+	   	</value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>InputDirectory</name>
+        <value>
+          <string>$DIR_INPUT_TEST</string>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>BrowseSubdirectories</name>
+        <value>
+          <boolean>false</boolean>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription>
+      <imports>
+        <import name="org.apache.uima.examples.SourceDocumentInformation"/>
+        <import location="../type/KnownPHITypeSystem.xml"/>
+      </imports>
+    </typeSystemDescription>
+    <typePriorities/>
+    <fsIndexCollection>
+      <fsIndexes>
+        <fsIndexDescription>
+          <label>KnownPHIIndex</label>
+          <typeName>org.spin.scrubber.uima.type.KnownPHI</typeName>
+          <kind>sorted</kind>
+          <keys>
+            <fsIndexKey>
+              <featureName>begin</featureName>
+              <comparator>standard</comparator>
+            </fsIndexKey>
+          </keys>
+        </fsIndexDescription>
+      </fsIndexes>
+    </fsIndexCollection>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs>
+          <type allAnnotatorFeatures="true">org.apache.uima.examples.SourceDocumentInformation</type>
+          <type allAnnotatorFeatures="true">org.spin.scrubber.uima.type.KnownPHI</type>
+        </outputs>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
+      <outputsNewCASes>true</outputsNewCASes>
+    </operationalProperties>
+  </processingResourceMetaData>
+  <resourceManagerConfiguration/>
+</collectionReaderDescription>

Propchange: ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_test.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_train.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_train.xml?rev=1500512&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_train.xml (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_train.xml Sun Jul  7 19:24:12 2013
@@ -0,0 +1,158 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+  
+    http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+-->
+<collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <implementationName>$UIMA_READER_IMPL_TRAIN</implementationName>
+  <processingResourceMetaData>
+    <name>File System Collection Reader</name>
+    <description>Reads files from the filesystem.  This CollectionReader may be used
+          with or without a CAS Initializer.  If a CAS Initializer is supplied, it will
+          be passed an InputStream to the file and must populate the CAS from that
+          InputStream.  If no CAS Initializer is supplied, this CollectionReader will
+          read the file itself and set treat the entire contents of the file as the
+          document to be inserted into the CAS.</description>
+    <version>1.0</version>
+    <vendor>The Apache Software Foundation</vendor>
+    <configurationParameters>
+      <configurationParameter>
+        <name>KnownPHINodeList</name>
+        <description>List of XPaths to specific fields known to contain ONLY PHI.</description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>ScrubNodeList</name>
+        <description>List of XPaths to scrub</description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>InputDirectory</name>
+        <description>Directory containing input files</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>Encoding</name>
+        <description>Character encoding for the documents.  If not specified,
+                   the default system encoding will be used.  Note that this parameter
+                   only applies if there is no CAS Initializer provided; otherwise,
+                   it is the CAS Initializer's responsibility to deal with character
+                   encoding issues.</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>Language</name>
+        <description>ISO language code for the documents</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>BrowseSubdirectories</name>
+        <description>True means include files of subdirectories, recursively, of the input directory.</description>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>ScrubNodeList</name>
+        <value>
+	        <array>
+	          <string>/Envelope/Body/PathologyCase/FullReportData</string>
+	          <string>/Envelope/Body/PathologyCase/FullReportText</string>
+	          <string>/Envelope/Body/PathologyCase/GrossDescriptionText</string>
+	          <string>/Envelope/Body/PathologyCase/DiagnosisText</string>
+	        </array>
+	   	</value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>KnownPHINodeList</name>
+        <value>
+	        <array>
+	          <string>/Envelope/Header/Identifiers/FirstName</string>
+	          <string>/Envelope/Header/Identifiers/LastName</string>
+	          <string>/Envelope/Header/Identifiers/DateOfBirth</string>
+	          <string>/Envelope/Header/Identifiers/SSN</string>
+	          <string>/Envelope/Header/Identifiers/AccessionNumber</string>
+	          <string>/Envelope/Header/Identifiers/LocalMRN</string>
+	        </array>
+	   	</value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>InputDirectory</name>
+        <value>
+          <string>$DIR_INPUT_TRAIN</string>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>BrowseSubdirectories</name>
+        <value>
+          <boolean>false</boolean>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription>
+      <imports>
+        <import name="org.apache.uima.examples.SourceDocumentInformation"/>
+        <import location="../type/KnownPHITypeSystem.xml"/>
+      </imports>
+    </typeSystemDescription>
+    <typePriorities/>
+    <fsIndexCollection>
+      <fsIndexes>
+        <fsIndexDescription>
+          <label>KnownPHIIndex</label>
+          <typeName>org.spin.scrubber.uima.type.KnownPHI</typeName>
+          <kind>sorted</kind>
+          <keys>
+            <fsIndexKey>
+              <featureName>begin</featureName>
+              <comparator>standard</comparator>
+            </fsIndexKey>
+          </keys>
+        </fsIndexDescription>
+      </fsIndexes>
+    </fsIndexCollection>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs>
+          <type allAnnotatorFeatures="true">org.apache.uima.examples.SourceDocumentInformation</type>
+          <type allAnnotatorFeatures="true">org.spin.scrubber.uima.type.KnownPHI</type>
+        </outputs>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
+      <outputsNewCASes>true</outputsNewCASes>
+    </operationalProperties>
+  </processingResourceMetaData>
+  <resourceManagerConfiguration/>
+</collectionReaderDescription>

Propchange: ctakes/sandbox/ctakes-scrubber-deid/desc/reader/TEMPLATE.reader_files_train.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain