You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ma...@apache.org on 2013/01/18 22:12:56 UTC

svn commit: r1435343 - in /incubator/ctakes/trunk/ctakes-clinical-pipeline/desc: analysis_engine/assertion_training_steps/ collection_processing_engine/assertion_training_steps/

Author: mattcoarr
Date: Fri Jan 18 21:12:56 2013
New Revision: 1435343

URL: http://svn.apache.org/viewvc?rev=1435343&view=rev
Log:
added analysis engine and cpe descriptors to run xmi that only includes gold
entity/event mentions through the rest of ctakes (but not dictionary lookup or
assertion modules)

Added:
    incubator/ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/assertion_training_steps/
    incubator/ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/assertion_training_steps/AggregatePlaintextUMLSProcessor__no_dictionary_no_assertion.xml
      - copied, changed from r1435232, incubator/ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextUMLSProcessor.xml
    incubator/ctakes/trunk/ctakes-clinical-pipeline/desc/collection_processing_engine/assertion_training_steps/
    incubator/ctakes/trunk/ctakes-clinical-pipeline/desc/collection_processing_engine/assertion_training_steps/process_gold_assertion_data.xml

Copied: incubator/ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/assertion_training_steps/AggregatePlaintextUMLSProcessor__no_dictionary_no_assertion.xml (from r1435232, incubator/ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextUMLSProcessor.xml)
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/assertion_training_steps/AggregatePlaintextUMLSProcessor__no_dictionary_no_assertion.xml?p2=incubator/ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/assertion_training_steps/AggregatePlaintextUMLSProcessor__no_dictionary_no_assertion.xml&p1=incubator/ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextUMLSProcessor.xml&r1=1435232&r2=1435343&rev=1435343&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextUMLSProcessor.xml (original)
+++ incubator/ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/assertion_training_steps/AggregatePlaintextUMLSProcessor__no_dictionary_no_assertion.xml Fri Jan 18 21:12:56 2013
@@ -19,57 +19,68 @@
     under the License.
 
 -->
+<!--
+  comment: this is a version of the clinical document pipeline used for
+  pre-processing data before running through the assertion training+evaluation
+  process. The input files should be xmi files converted from knowtator xml.
+-->
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
   <primitive>false</primitive>
   <delegateAnalysisEngineSpecifiers>
     <delegateAnalysisEngine key="Chunker">
-      <import location="../../../ctakes-chunker/desc/Chunker.xml"/>
+      <import location="../../../../ctakes-chunker/desc/Chunker.xml"/>
     </delegateAnalysisEngine>
     <delegateAnalysisEngine key="TokenizerAnnotator">
-      <import location="../../../ctakes-core/desc/analysis_engine/TokenizerAnnotator.xml"/>
+      <import location="../../../../ctakes-core/desc/analysis_engine/TokenizerAnnotator.xml"/>
     </delegateAnalysisEngine>
     <delegateAnalysisEngine key="ContextDependentTokenizerAnnotator">
-      <import location="../../../ctakes-context-tokenizer/desc/analysis_engine/ContextDependentTokenizerAnnotator.xml"/>
+      <import location="../../../../ctakes-context-tokenizer/desc/analysis_engine/ContextDependentTokenizerAnnotator.xml"/>
     </delegateAnalysisEngine>
+    <!--
     <delegateAnalysisEngine key="DictionaryLookupAnnotatorDB">
-      <import location="../../../ctakes-dictionary-lookup/desc/analysis_engine/DictionaryLookupAnnotatorUMLS.xml"/>
+      <import location="../../../../ctakes-dictionary-lookup/desc/analysis_engine/DictionaryLookupAnnotatorUMLS.xml"/>
     </delegateAnalysisEngine>
+    -->
+    <!--
     <delegateAnalysisEngine key="StatusAnnotator">
-      <import location="../../../ctakes-ne-contexts/desc/StatusAnnotator.xml"/>
+      <import location="../../../../ctakes-ne-contexts/desc/StatusAnnotator.xml"/>
     </delegateAnalysisEngine>
     <delegateAnalysisEngine key="NegationAnnotator">
-      <import location="../../../ctakes-ne-contexts/desc/NegationAnnotator.xml"/>
+      <import location="../../../../ctakes-ne-contexts/desc/NegationAnnotator.xml"/>
     </delegateAnalysisEngine>
+    -->
     <delegateAnalysisEngine key="ExtractionPrepAnnotator">
-      <import location="ExtractionPrepAnnotator.xml"/>
+      <import location="../ExtractionPrepAnnotator.xml"/>
     </delegateAnalysisEngine>
     <delegateAnalysisEngine key="SentenceDetectorAnnotator">
-      <import location="../../../ctakes-core/desc/analysis_engine/SentenceDetectorAnnotator.xml"/>
+      <import location="../../../../ctakes-core/desc/analysis_engine/SentenceDetectorAnnotator.xml"/>
     </delegateAnalysisEngine>
     <delegateAnalysisEngine key="LookupWindowAnnotator">
-      <import location="LookupWindowAnnotator.xml"/>
+      <import location="../LookupWindowAnnotator.xml"/>
     </delegateAnalysisEngine>
     <delegateAnalysisEngine key="AdjustNounPhraseToIncludeFollowingNP">
-      <import location="../../../ctakes-chunker/desc/AdjustNounPhraseToIncludeFollowingNP.xml"/>
+      <import location="../../../../ctakes-chunker/desc/AdjustNounPhraseToIncludeFollowingNP.xml"/>
     </delegateAnalysisEngine>
     <delegateAnalysisEngine key="AdjustNounPhraseToIncludeFollowingPPNP">
-      <import location="../../../ctakes-chunker/desc/AdjustNounPhraseToIncludeFollowingPPNP.xml"/>
+      <import location="../../../../ctakes-chunker/desc/AdjustNounPhraseToIncludeFollowingPPNP.xml"/>
     </delegateAnalysisEngine>
     <delegateAnalysisEngine key="SimpleSegmentAnnotator">
-      <import location="SimpleSegmentAnnotator.xml"/>
+      <import location="../SimpleSegmentAnnotator.xml"/>
     </delegateAnalysisEngine>
     <delegateAnalysisEngine key="POSTagger">
-      <import location="../../../ctakes-pos-tagger/desc/POSTagger.xml"/>
+      <import location="../../../../ctakes-pos-tagger/desc/POSTagger.xml"/>
     </delegateAnalysisEngine>
     <delegateAnalysisEngine key="LvgAnnotator">
-      <import location="../../../ctakes-lvg/desc/analysis_engine/LvgAnnotator.xml"/>
+      <import location="../../../../ctakes-lvg/desc/analysis_engine/LvgAnnotator.xml"/>
     </delegateAnalysisEngine>
+    <!--
     <delegateAnalysisEngine key="AssertionAnnotator">
-      <import location="../../../ctakes-assertion/desc/AssertionMiniPipelineAnalysisEngine.xml"/>
+      <import location="../../../../ctakes-assertion/desc/AssertionMiniPipelineAnalysisEngine.xml"/>
     </delegateAnalysisEngine>
+    -->
     <delegateAnalysisEngine key="DependencyParser">
-      <import location="../../../ctakes-dependency-parser/desc/analysis_engine/ClearParserDependencyParserAE.xml"/>
+      <import location="../../../../ctakes-dependency-parser/desc/analysis_engine/ClearParserDependencyParserAE.xml"/>
     </delegateAnalysisEngine>
   </delegateAnalysisEngineSpecifiers>
   <analysisEngineMetaData>
@@ -120,9 +131,9 @@ and requires an UMLS license.  Please up
         <node>AdjustNounPhraseToIncludeFollowingNP</node>
         <node>AdjustNounPhraseToIncludeFollowingPPNP</node>
         <node>LookupWindowAnnotator</node>
-        <node>DictionaryLookupAnnotatorDB</node>
+        <!-- <node>DictionaryLookupAnnotatorDB</node> -->
         <node>DependencyParser</node>
-        <node>AssertionAnnotator</node>
+        <!-- <node>AssertionAnnotator</node> -->
         <!-- 
         	<node>StatusAnnotator</node>
         	<node>NegationAnnotator</node>

Added: incubator/ctakes/trunk/ctakes-clinical-pipeline/desc/collection_processing_engine/assertion_training_steps/process_gold_assertion_data.xml
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-clinical-pipeline/desc/collection_processing_engine/assertion_training_steps/process_gold_assertion_data.xml?rev=1435343&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-clinical-pipeline/desc/collection_processing_engine/assertion_training_steps/process_gold_assertion_data.xml (added)
+++ incubator/ctakes/trunk/ctakes-clinical-pipeline/desc/collection_processing_engine/assertion_training_steps/process_gold_assertion_data.xml Fri Jan 18 21:12:56 2013
@@ -0,0 +1,58 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<cpeDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <collectionReader>
+        <collectionIterator>
+            <descriptor>
+                <import location="../../../../ctakes-core/desc/collection_reader/XmiCollectionReaderCtakes.xml"/>
+            </descriptor>
+            <configurationParameterSettings>
+                <nameValuePair>
+                    <name>InputDirectory</name>
+                    <value>
+                        <string>/tmp/assertion_data/input</string>
+                    </value>
+                </nameValuePair>
+            </configurationParameterSettings>
+        </collectionIterator>
+    </collectionReader>
+    <casProcessors casPoolSize="3" processingUnitThreadCount="1">
+        <casProcessor deployment="integrated" name="AggregatePlaintextUMLSProcessor">
+            <descriptor>
+                <import location="../../analysis_engine/assertion_training_steps/AggregatePlaintextUMLSProcessor__no_dictionary_no_assertion.xml"/>
+            </descriptor>
+            <deploymentParameters/>
+            <errorHandling>
+                <errorRateThreshold action="terminate" value="0/1000"/>
+                <maxConsecutiveRestarts action="terminate" value="30"/>
+                <timeout max="100000" default="-1"/>
+            </errorHandling>
+            <checkpoint batch="10000" time="1000ms"/>
+        </casProcessor>
+        <casProcessor deployment="integrated" name="XMI Writer CAS Consumer">
+            <descriptor>
+                <import location="../../../../ctakes-core/desc/cas_consumer/__XmiWriterCasConsumer.xml"/>
+            </descriptor>
+            <deploymentParameters/>
+            <errorHandling>
+                <errorRateThreshold action="terminate" value="0/1000"/>
+                <maxConsecutiveRestarts action="terminate" value="30"/>
+                <timeout max="100000" default="-1"/>
+            </errorHandling>
+            <checkpoint batch="10000" time="1000ms"/>
+            <configurationParameterSettings>
+                <nameValuePair>
+                    <name>OutputDirectory</name>
+                    <value>
+                        <string>/tmp/assertion_data/output</string>
+                    </value>
+                </nameValuePair>
+            </configurationParameterSettings>
+        </casProcessor>
+    </casProcessors>
+    <cpeConfig>
+        <numToProcess>-1</numToProcess>
+        <deployAs>immediate</deployAs>
+        <checkpoint batch="0" time="300000ms"/>
+        <timerImpl/>
+    </cpeConfig>
+</cpeDescription>