You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ch...@apache.org on 2014/11/07 17:02:42 UTC
svn commit: r1637395 - in /ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine: AggregatePlaintextFastUMLSProcessor.xml AggregatePlaintextUMLSProcessor.xml

Author: chenpei
Date: Fri Nov  7 16:02:42 2014
New Revision: 1637395

URL: http://svn.apache.org/r1637395
Log:
CTAKES-325 - Creating a separate AggregatePlaintextFastUMLSProcessor.xml that uses the dictionar-lookup-fast module.

Added:
    ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextFastUMLSProcessor.xml   (with props)
Modified:
    ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextUMLSProcessor.xml

Added: ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextFastUMLSProcessor.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextFastUMLSProcessor.xml?rev=1637395&view=auto
==============================================================================
--- ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextFastUMLSProcessor.xml (added)
+++ ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextFastUMLSProcessor.xml Fri Nov  7 16:02:42 2014
@@ -0,0 +1,259 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+-->
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>false</primitive>
+  <delegateAnalysisEngineSpecifiers>
+    <delegateAnalysisEngine key="Chunker">
+      <import location="../../../ctakes-chunker/desc/Chunker.xml"/>
+    </delegateAnalysisEngine>
+    <delegateAnalysisEngine key="TokenizerAnnotator">
+      <import location="../../../ctakes-core/desc/analysis_engine/TokenizerAnnotator.xml"/>
+    </delegateAnalysisEngine>
+    <delegateAnalysisEngine key="ContextDependentTokenizerAnnotator">
+      <import location="../../../ctakes-context-tokenizer/desc/analysis_engine/ContextDependentTokenizerAnnotator.xml"/>
+    </delegateAnalysisEngine>
+    <delegateAnalysisEngine key="DictionaryLookupAnnotatorDB">
+       <import location="../../../ctakes-dictionary-lookup-fast/desc/analysis_engine/UmlsLookupAnnotator.xml"/>
+    </delegateAnalysisEngine>
+    <delegateAnalysisEngine key="StatusAnnotator">
+      <import location="../../../ctakes-ne-contexts/desc/StatusAnnotator.xml"/>
+    </delegateAnalysisEngine>
+    <delegateAnalysisEngine key="NegationAnnotator">
+      <import location="../../../ctakes-ne-contexts/desc/NegationAnnotator.xml"/>
+    </delegateAnalysisEngine>
+    <delegateAnalysisEngine key="ExtractionPrepAnnotator">
+      <import location="ExtractionPrepAnnotator.xml"/>
+    </delegateAnalysisEngine>
+    <delegateAnalysisEngine key="SentenceDetectorAnnotator">
+      <import location="../../../ctakes-core/desc/analysis_engine/SentenceDetectorAnnotator.xml"/>
+    </delegateAnalysisEngine>
+     <!-- By default, the dictionary lookup window is Sentence.
+          The change was made in 3.2.1 because experiments showed that many terms were missed when relying upon the
+          accuracy of LookupWindowAnnotator to correctly identify all present full noun phrases.
+          Instead, reliance is now upon the fact that most terms in the dictionary itself are (or fit in) noun phrases.
+     To revert to LookupWindowAnnotation:
+       1.  uncomment the following lines to load the LookupWindowAnnotator,
+       2.  uncomment the LookupWindowAnnotator line in <fixedFlow>,
+       3.  uncomment the LookupWindowAnnotation line in <capability> <outputs> <type>
+       4.  in ctakes-dictionary-lookup-fast .. /desc/analysis_engine/UmlsLookupAnnotator.xml
+       switch the value for <nameValuePair> windowAnnotations.
+       LookupWindowAnnotation is still there, just commented
+       5.  also uncomment <capability> <inputs> <type> ... LookupWindowAnnotation in UmlsLookupAnnotator.xml
+       The AdjustNounPhrase*** annotators have been left in case another module needs them.
+       I leave it to somebody with more applicable knowledge to remove them from the flow.
+       -->
+    <!--<delegateAnalysisEngine key="LookupWindowAnnotator">-->
+      <!--<import location="LookupWindowAnnotator.xml"/>-->
+    <!--</delegateAnalysisEngine>-->
+    <delegateAnalysisEngine key="AdjustNounPhraseToIncludeFollowingNP">
+      <import location="../../../ctakes-chunker/desc/AdjustNounPhraseToIncludeFollowingNP.xml"/>
+    </delegateAnalysisEngine>
+    <delegateAnalysisEngine key="AdjustNounPhraseToIncludeFollowingPPNP">
+      <import location="../../../ctakes-chunker/desc/AdjustNounPhraseToIncludeFollowingPPNP.xml"/>
+    </delegateAnalysisEngine>
+    <delegateAnalysisEngine key="SimpleSegmentAnnotator">
+      <import location="SimpleSegmentAnnotator.xml"/>
+    </delegateAnalysisEngine>
+    <delegateAnalysisEngine key="POSTagger">
+      <import location="../../../ctakes-pos-tagger/desc/POSTagger.xml"/>
+    </delegateAnalysisEngine>
+	<!-- 
+	<delegateAnalysisEngine key="ClearPOSTagger">
+	<import location="../../../ctakes-pos-tagger/desc/ClearNLPPOSTagger.xml"/>
+	</delegateAnalysisEngine>
+	 -->    
+    <delegateAnalysisEngine key="LvgAnnotator">
+      <import location="../../../ctakes-lvg/desc/analysis_engine/LvgAnnotator.xml"/>
+    </delegateAnalysisEngine>
+<!--     
+    <delegateAnalysisEngine key="AssertionAnnotator">
+      <import location="../../../ctakes-assertion/desc/AssertionMiniPipelineAnalysisEngine.xml"/>
+    </delegateAnalysisEngine>
+ -->
+     <delegateAnalysisEngine key="GenericCleartkAnalysisEngine">
+      <import location="../../../ctakes-assertion/desc/analysis_engine/GenericCleartkAnalysisEngine.xml"/>
+     </delegateAnalysisEngine>
+     
+     <delegateAnalysisEngine key="HistoryCleartkAnalysisEngine">
+      <import location="../../../ctakes-assertion/desc/analysis_engine/HistoryCleartkAnalysisEngine.xml"/>
+     </delegateAnalysisEngine>
+     <delegateAnalysisEngine key="PolarityCleartkAnalysisEngine">
+      <import location="../../../ctakes-assertion/desc/analysis_engine/PolarityCleartkAnalysisEngine.xml"/>
+     </delegateAnalysisEngine>
+     <delegateAnalysisEngine key="SubjectCleartkAnalysisEngine">
+      <import location="../../../ctakes-assertion/desc/analysis_engine/SubjectCleartkAnalysisEngine.xml"/>
+     </delegateAnalysisEngine>
+     <delegateAnalysisEngine key="UncertaintyCleartkAnalysisEngine">
+      <import location="../../../ctakes-assertion/desc/analysis_engine/UncertaintyCleartkAnalysisEngine.xml"/>
+     </delegateAnalysisEngine>
+     
+    <delegateAnalysisEngine key="DependencyParser">
+      <import location="../../../ctakes-dependency-parser/desc/analysis_engine/ClearNLPDependencyParserAE.xml"/>
+    </delegateAnalysisEngine>
+<delegateAnalysisEngine key="SemanticRoleLabeler">
+<import location="../../../ctakes-dependency-parser/desc/analysis_engine/ClearNLPSemanticRoleLabelerAE.xml"/>
+</delegateAnalysisEngine>    
+
+    <delegateAnalysisEngine key="ConstituencyParser">
+      <import location="../../../ctakes-constituency-parser/desc/ConstituencyParserAnnotator.xml"/>
+    </delegateAnalysisEngine>
+    
+  </delegateAnalysisEngineSpecifiers>
+  <analysisEngineMetaData>
+    <name>AggregatePlaintextUMLSProcessor</name>
+    <description>Runs the complete pipeline for annotating clinical documents in plain text format using the built in UMLS (SNOMEDCT and RxNORM) dictionaries.  This uses the dictionary lookup/desc/DictionaryLookupAnnotatorUMLS.xml
+and requires an UMLS license.  Please update DictionaryLookupAnnotatorUMLS.xml file with your UMLS username and password.
+</description>
+    <version/>
+    <vendor/>
+    <configurationParameters searchStrategy="language_fallback">
+      <configurationParameter>
+        <name>SegmentID</name>
+        <description/>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+        <overrides>
+          <parameter>SimpleSegmentAnnotator/SegmentID</parameter>
+        </overrides>
+      </configurationParameter>
+      <configurationParameter>
+        <name>ChunkCreatorClass</name>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+        <overrides>
+          <parameter>Chunker/ChunkCreatorClass</parameter>
+        </overrides>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>ChunkCreatorClass</name>
+        <value>
+          <string>org.apache.ctakes.chunker.ae.PhraseTypeChunkCreator</string>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <flowConstraints>
+      <fixedFlow>
+        <node>SimpleSegmentAnnotator</node>
+        <node>SentenceDetectorAnnotator</node>
+        <node>TokenizerAnnotator</node>
+        <node>LvgAnnotator</node>
+        <node>ContextDependentTokenizerAnnotator</node>
+        <node>POSTagger</node>
+		<!-- <node>ClearPOSTagger</node>  -->        
+        <node>Chunker</node>
+        <node>AdjustNounPhraseToIncludeFollowingNP</node>
+        <node>AdjustNounPhraseToIncludeFollowingPPNP</node>
+        <!--<node>LookupWindowAnnotator</node>-->
+        <node>DictionaryLookupAnnotatorDB</node>
+        <node>DependencyParser</node>
+		<node>SemanticRoleLabeler</node>        
+		<node>ConstituencyParser</node>
+        <!-- <node>AssertionAnnotator</node> -->
+        <!-- <node>StatusAnnotator</node> -->
+       	<!-- <node>NegationAnnotator</node> -->
+       	<node>GenericCleartkAnalysisEngine</node>
+		<node>HistoryCleartkAnalysisEngine</node>
+		<node>PolarityCleartkAnalysisEngine</node>
+		<node>SubjectCleartkAnalysisEngine</node>
+		<node>UncertaintyCleartkAnalysisEngine</node>
+		    
+        <node>ExtractionPrepAnnotator</node>
+      </fixedFlow>
+    </flowConstraints>
+    <typePriorities>
+      <name>Ordering</name>
+      <description>For subiterator</description>
+      <version>1.0</version>
+      <priorityList>
+        <type>org.apache.ctakes.typesystem.type.textspan.Segment</type>
+        <type>org.apache.ctakes.typesystem.type.textspan.Sentence</type>
+        <type>org.apache.ctakes.typesystem.type.syntax.BaseToken</type>
+      </priorityList>
+      <priorityList>
+        <type>org.apache.ctakes.typesystem.type.textspan.Sentence</type>
+        <type>org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation</type>
+      </priorityList>
+    </typePriorities>
+    <fsIndexCollection/>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.NewlineToken</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.WordToken</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.VP</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.refsem.UmlsConcept</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.UCP</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.TimeAnnotation</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.SymbolToken</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textspan.Sentence</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textspanSegment</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.SBAR</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.RomanNumeralAnnotation</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.RangeAnnotation</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.PunctuationToken</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.Property</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.Properties</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.PersonTitleAnnotation</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.PRT</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.PP</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.OntologyConcept</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.NumToken</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.MeasurementAnnotation</type>
+          <!--<type allAnnotatorFeatures="true">edu.mayo.bmi.uima.lookup.type.LookupWindowAnnotation</type>-->
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.Lemma</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.LST</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.INTJ</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.FractionAnnotation</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.structured.DocumentID</type>
+          <type allAnnotatorFeatures="true">uima.tcas.DocumentAnnotation</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.DateAnnotation</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.CopySrcAnnotation</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.CopyDestAnnotation</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.ContractionToken</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.ContextAnnotation</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.Chunk</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.CONJP</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.BaseToken</type>
+          <type allAnnotatorFeatures="true">uima.cas.AnnotationBase</type>
+          <type allAnnotatorFeatures="true">uima.tcas.Annotation</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.ADVP</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.ADJP</type>        
+        </outputs>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
+  <resourceManagerConfiguration/>
+</analysisEngineDescription>

Propchange: ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextFastUMLSProcessor.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextUMLSProcessor.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextUMLSProcessor.xml?rev=1637395&r1=1637394&r2=1637395&view=diff
==============================================================================
--- ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextUMLSProcessor.xml (original)
+++ ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextUMLSProcessor.xml Fri Nov  7 16:02:42 2014
@@ -33,7 +33,7 @@
       <import location="../../../ctakes-context-tokenizer/desc/analysis_engine/ContextDependentTokenizerAnnotator.xml"/>
     </delegateAnalysisEngine>
     <delegateAnalysisEngine key="DictionaryLookupAnnotatorDB">
-       <import location="../../../ctakes-dictionary-lookup-fast/desc/analysis_engine/UmlsLookupAnnotator.xml"/>
+      <import location="../../../ctakes-dictionary-lookup/desc/analysis_engine/DictionaryLookupAnnotatorUMLS.xml"/>
     </delegateAnalysisEngine>
     <delegateAnalysisEngine key="StatusAnnotator">
       <import location="../../../ctakes-ne-contexts/desc/StatusAnnotator.xml"/>
@@ -47,24 +47,9 @@
     <delegateAnalysisEngine key="SentenceDetectorAnnotator">
       <import location="../../../ctakes-core/desc/analysis_engine/SentenceDetectorAnnotator.xml"/>
     </delegateAnalysisEngine>
-     <!-- By default, the dictionary lookup window is Sentence.
-          The change was made in 3.2.1 because experiments showed that many terms were missed when relying upon the
-          accuracy of LookupWindowAnnotator to correctly identify all present full noun phrases.
-          Instead, reliance is now upon the fact that most terms in the dictionary itself are (or fit in) noun phrases.
-     To revert to LookupWindowAnnotation:
-       1.  uncomment the following lines to load the LookupWindowAnnotator,
-       2.  uncomment the LookupWindowAnnotator line in <fixedFlow>,
-       3.  uncomment the LookupWindowAnnotation line in <capability> <outputs> <type>
-       4.  in ctakes-dictionary-lookup-fast .. /desc/analysis_engine/UmlsLookupAnnotator.xml
-       switch the value for <nameValuePair> windowAnnotations.
-       LookupWindowAnnotation is still there, just commented
-       5.  also uncomment <capability> <inputs> <type> ... LookupWindowAnnotation in UmlsLookupAnnotator.xml
-       The AdjustNounPhrase*** annotators have been left in case another module needs them.
-       I leave it to somebody with more applicable knowledge to remove them from the flow.
-       -->
-    <!--<delegateAnalysisEngine key="LookupWindowAnnotator">-->
-      <!--<import location="LookupWindowAnnotator.xml"/>-->
-    <!--</delegateAnalysisEngine>-->
+    <delegateAnalysisEngine key="LookupWindowAnnotator">
+      <import location="LookupWindowAnnotator.xml"/>
+    </delegateAnalysisEngine>
     <delegateAnalysisEngine key="AdjustNounPhraseToIncludeFollowingNP">
       <import location="../../../ctakes-chunker/desc/AdjustNounPhraseToIncludeFollowingNP.xml"/>
     </delegateAnalysisEngine>
@@ -167,7 +152,7 @@ and requires an UMLS license.  Please up
         <node>Chunker</node>
         <node>AdjustNounPhraseToIncludeFollowingNP</node>
         <node>AdjustNounPhraseToIncludeFollowingPPNP</node>
-        <!--<node>LookupWindowAnnotator</node>-->
+        <node>LookupWindowAnnotator</node>
         <node>DictionaryLookupAnnotatorDB</node>
         <node>DependencyParser</node>
 		<node>SemanticRoleLabeler</node>