You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ch...@apache.org on 2014/11/07 17:02:42 UTC
svn commit: r1637395 - in
/ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine:
AggregatePlaintextFastUMLSProcessor.xml AggregatePlaintextUMLSProcessor.xml
Author: chenpei
Date: Fri Nov 7 16:02:42 2014
New Revision: 1637395
URL: http://svn.apache.org/r1637395
Log:
CTAKES-325 - Creating a separate AggregatePlaintextFastUMLSProcessor.xml that uses the dictionar-lookup-fast module.
Added:
ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextFastUMLSProcessor.xml (with props)
Modified:
ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextUMLSProcessor.xml
Added: ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextFastUMLSProcessor.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextFastUMLSProcessor.xml?rev=1637395&view=auto
==============================================================================
--- ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextFastUMLSProcessor.xml (added)
+++ ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextFastUMLSProcessor.xml Fri Nov 7 16:02:42 2014
@@ -0,0 +1,259 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+ <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+ <primitive>false</primitive>
+ <delegateAnalysisEngineSpecifiers>
+ <delegateAnalysisEngine key="Chunker">
+ <import location="../../../ctakes-chunker/desc/Chunker.xml"/>
+ </delegateAnalysisEngine>
+ <delegateAnalysisEngine key="TokenizerAnnotator">
+ <import location="../../../ctakes-core/desc/analysis_engine/TokenizerAnnotator.xml"/>
+ </delegateAnalysisEngine>
+ <delegateAnalysisEngine key="ContextDependentTokenizerAnnotator">
+ <import location="../../../ctakes-context-tokenizer/desc/analysis_engine/ContextDependentTokenizerAnnotator.xml"/>
+ </delegateAnalysisEngine>
+ <delegateAnalysisEngine key="DictionaryLookupAnnotatorDB">
+ <import location="../../../ctakes-dictionary-lookup-fast/desc/analysis_engine/UmlsLookupAnnotator.xml"/>
+ </delegateAnalysisEngine>
+ <delegateAnalysisEngine key="StatusAnnotator">
+ <import location="../../../ctakes-ne-contexts/desc/StatusAnnotator.xml"/>
+ </delegateAnalysisEngine>
+ <delegateAnalysisEngine key="NegationAnnotator">
+ <import location="../../../ctakes-ne-contexts/desc/NegationAnnotator.xml"/>
+ </delegateAnalysisEngine>
+ <delegateAnalysisEngine key="ExtractionPrepAnnotator">
+ <import location="ExtractionPrepAnnotator.xml"/>
+ </delegateAnalysisEngine>
+ <delegateAnalysisEngine key="SentenceDetectorAnnotator">
+ <import location="../../../ctakes-core/desc/analysis_engine/SentenceDetectorAnnotator.xml"/>
+ </delegateAnalysisEngine>
+ <!-- By default, the dictionary lookup window is Sentence.
+ The change was made in 3.2.1 because experiments showed that many terms were missed when relying upon the
+ accuracy of LookupWindowAnnotator to correctly identify all present full noun phrases.
+ Instead, reliance is now upon the fact that most terms in the dictionary itself are (or fit in) noun phrases.
+ To revert to LookupWindowAnnotation:
+ 1. uncomment the following lines to load the LookupWindowAnnotator,
+ 2. uncomment the LookupWindowAnnotator line in <fixedFlow>,
+ 3. uncomment the LookupWindowAnnotation line in <capability> <outputs> <type>
+ 4. in ctakes-dictionary-lookup-fast .. /desc/analysis_engine/UmlsLookupAnnotator.xml
+ switch the value for <nameValuePair> windowAnnotations.
+ LookupWindowAnnotation is still there, just commented
+ 5. also uncomment <capability> <inputs> <type> ... LookupWindowAnnotation in UmlsLookupAnnotator.xml
+ The AdjustNounPhrase*** annotators have been left in case another module needs them.
+ I leave it to somebody with more applicable knowledge to remove them from the flow.
+ -->
+ <!--<delegateAnalysisEngine key="LookupWindowAnnotator">-->
+ <!--<import location="LookupWindowAnnotator.xml"/>-->
+ <!--</delegateAnalysisEngine>-->
+ <delegateAnalysisEngine key="AdjustNounPhraseToIncludeFollowingNP">
+ <import location="../../../ctakes-chunker/desc/AdjustNounPhraseToIncludeFollowingNP.xml"/>
+ </delegateAnalysisEngine>
+ <delegateAnalysisEngine key="AdjustNounPhraseToIncludeFollowingPPNP">
+ <import location="../../../ctakes-chunker/desc/AdjustNounPhraseToIncludeFollowingPPNP.xml"/>
+ </delegateAnalysisEngine>
+ <delegateAnalysisEngine key="SimpleSegmentAnnotator">
+ <import location="SimpleSegmentAnnotator.xml"/>
+ </delegateAnalysisEngine>
+ <delegateAnalysisEngine key="POSTagger">
+ <import location="../../../ctakes-pos-tagger/desc/POSTagger.xml"/>
+ </delegateAnalysisEngine>
+ <!--
+ <delegateAnalysisEngine key="ClearPOSTagger">
+ <import location="../../../ctakes-pos-tagger/desc/ClearNLPPOSTagger.xml"/>
+ </delegateAnalysisEngine>
+ -->
+ <delegateAnalysisEngine key="LvgAnnotator">
+ <import location="../../../ctakes-lvg/desc/analysis_engine/LvgAnnotator.xml"/>
+ </delegateAnalysisEngine>
+<!--
+ <delegateAnalysisEngine key="AssertionAnnotator">
+ <import location="../../../ctakes-assertion/desc/AssertionMiniPipelineAnalysisEngine.xml"/>
+ </delegateAnalysisEngine>
+ -->
+ <delegateAnalysisEngine key="GenericCleartkAnalysisEngine">
+ <import location="../../../ctakes-assertion/desc/analysis_engine/GenericCleartkAnalysisEngine.xml"/>
+ </delegateAnalysisEngine>
+
+ <delegateAnalysisEngine key="HistoryCleartkAnalysisEngine">
+ <import location="../../../ctakes-assertion/desc/analysis_engine/HistoryCleartkAnalysisEngine.xml"/>
+ </delegateAnalysisEngine>
+ <delegateAnalysisEngine key="PolarityCleartkAnalysisEngine">
+ <import location="../../../ctakes-assertion/desc/analysis_engine/PolarityCleartkAnalysisEngine.xml"/>
+ </delegateAnalysisEngine>
+ <delegateAnalysisEngine key="SubjectCleartkAnalysisEngine">
+ <import location="../../../ctakes-assertion/desc/analysis_engine/SubjectCleartkAnalysisEngine.xml"/>
+ </delegateAnalysisEngine>
+ <delegateAnalysisEngine key="UncertaintyCleartkAnalysisEngine">
+ <import location="../../../ctakes-assertion/desc/analysis_engine/UncertaintyCleartkAnalysisEngine.xml"/>
+ </delegateAnalysisEngine>
+
+ <delegateAnalysisEngine key="DependencyParser">
+ <import location="../../../ctakes-dependency-parser/desc/analysis_engine/ClearNLPDependencyParserAE.xml"/>
+ </delegateAnalysisEngine>
+<delegateAnalysisEngine key="SemanticRoleLabeler">
+<import location="../../../ctakes-dependency-parser/desc/analysis_engine/ClearNLPSemanticRoleLabelerAE.xml"/>
+</delegateAnalysisEngine>
+
+ <delegateAnalysisEngine key="ConstituencyParser">
+ <import location="../../../ctakes-constituency-parser/desc/ConstituencyParserAnnotator.xml"/>
+ </delegateAnalysisEngine>
+
+ </delegateAnalysisEngineSpecifiers>
+ <analysisEngineMetaData>
+ <name>AggregatePlaintextUMLSProcessor</name>
+ <description>Runs the complete pipeline for annotating clinical documents in plain text format using the built in UMLS (SNOMEDCT and RxNORM) dictionaries. This uses the dictionary lookup/desc/DictionaryLookupAnnotatorUMLS.xml
+and requires an UMLS license. Please update DictionaryLookupAnnotatorUMLS.xml file with your UMLS username and password.
+</description>
+ <version/>
+ <vendor/>
+ <configurationParameters searchStrategy="language_fallback">
+ <configurationParameter>
+ <name>SegmentID</name>
+ <description/>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
+ <overrides>
+ <parameter>SimpleSegmentAnnotator/SegmentID</parameter>
+ </overrides>
+ </configurationParameter>
+ <configurationParameter>
+ <name>ChunkCreatorClass</name>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>true</mandatory>
+ <overrides>
+ <parameter>Chunker/ChunkCreatorClass</parameter>
+ </overrides>
+ </configurationParameter>
+ </configurationParameters>
+ <configurationParameterSettings>
+ <nameValuePair>
+ <name>ChunkCreatorClass</name>
+ <value>
+ <string>org.apache.ctakes.chunker.ae.PhraseTypeChunkCreator</string>
+ </value>
+ </nameValuePair>
+ </configurationParameterSettings>
+ <flowConstraints>
+ <fixedFlow>
+ <node>SimpleSegmentAnnotator</node>
+ <node>SentenceDetectorAnnotator</node>
+ <node>TokenizerAnnotator</node>
+ <node>LvgAnnotator</node>
+ <node>ContextDependentTokenizerAnnotator</node>
+ <node>POSTagger</node>
+ <!-- <node>ClearPOSTagger</node> -->
+ <node>Chunker</node>
+ <node>AdjustNounPhraseToIncludeFollowingNP</node>
+ <node>AdjustNounPhraseToIncludeFollowingPPNP</node>
+ <!--<node>LookupWindowAnnotator</node>-->
+ <node>DictionaryLookupAnnotatorDB</node>
+ <node>DependencyParser</node>
+ <node>SemanticRoleLabeler</node>
+ <node>ConstituencyParser</node>
+ <!-- <node>AssertionAnnotator</node> -->
+ <!-- <node>StatusAnnotator</node> -->
+ <!-- <node>NegationAnnotator</node> -->
+ <node>GenericCleartkAnalysisEngine</node>
+ <node>HistoryCleartkAnalysisEngine</node>
+ <node>PolarityCleartkAnalysisEngine</node>
+ <node>SubjectCleartkAnalysisEngine</node>
+ <node>UncertaintyCleartkAnalysisEngine</node>
+
+ <node>ExtractionPrepAnnotator</node>
+ </fixedFlow>
+ </flowConstraints>
+ <typePriorities>
+ <name>Ordering</name>
+ <description>For subiterator</description>
+ <version>1.0</version>
+ <priorityList>
+ <type>org.apache.ctakes.typesystem.type.textspan.Segment</type>
+ <type>org.apache.ctakes.typesystem.type.textspan.Sentence</type>
+ <type>org.apache.ctakes.typesystem.type.syntax.BaseToken</type>
+ </priorityList>
+ <priorityList>
+ <type>org.apache.ctakes.typesystem.type.textspan.Sentence</type>
+ <type>org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation</type>
+ </priorityList>
+ </typePriorities>
+ <fsIndexCollection/>
+ <capabilities>
+ <capability>
+ <inputs/>
+ <outputs>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.NewlineToken</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.WordToken</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.VP</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.refsem.UmlsConcept</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.UCP</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.TimeAnnotation</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.SymbolToken</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textspan.Sentence</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textspanSegment</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.SBAR</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.RomanNumeralAnnotation</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.RangeAnnotation</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.PunctuationToken</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.Property</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.Properties</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.PersonTitleAnnotation</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.PRT</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.PP</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.OntologyConcept</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.NumToken</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.MeasurementAnnotation</type>
+ <!--<type allAnnotatorFeatures="true">edu.mayo.bmi.uima.lookup.type.LookupWindowAnnotation</type>-->
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.Lemma</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.LST</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.INTJ</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.FractionAnnotation</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.structured.DocumentID</type>
+ <type allAnnotatorFeatures="true">uima.tcas.DocumentAnnotation</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.DateAnnotation</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.CopySrcAnnotation</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.CopyDestAnnotation</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.ContractionToken</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.ContextAnnotation</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.Chunk</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.CONJP</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.BaseToken</type>
+ <type allAnnotatorFeatures="true">uima.cas.AnnotationBase</type>
+ <type allAnnotatorFeatures="true">uima.tcas.Annotation</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.ADVP</type>
+ <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.ADJP</type>
+ </outputs>
+ <languagesSupported/>
+ </capability>
+ </capabilities>
+ <operationalProperties>
+ <modifiesCas>true</modifiesCas>
+ <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+ <outputsNewCASes>false</outputsNewCASes>
+ </operationalProperties>
+ </analysisEngineMetaData>
+ <resourceManagerConfiguration/>
+</analysisEngineDescription>
Propchange: ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextFastUMLSProcessor.xml
------------------------------------------------------------------------------
svn:eol-style = native
Modified: ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextUMLSProcessor.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextUMLSProcessor.xml?rev=1637395&r1=1637394&r2=1637395&view=diff
==============================================================================
--- ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextUMLSProcessor.xml (original)
+++ ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextUMLSProcessor.xml Fri Nov 7 16:02:42 2014
@@ -33,7 +33,7 @@
<import location="../../../ctakes-context-tokenizer/desc/analysis_engine/ContextDependentTokenizerAnnotator.xml"/>
</delegateAnalysisEngine>
<delegateAnalysisEngine key="DictionaryLookupAnnotatorDB">
- <import location="../../../ctakes-dictionary-lookup-fast/desc/analysis_engine/UmlsLookupAnnotator.xml"/>
+ <import location="../../../ctakes-dictionary-lookup/desc/analysis_engine/DictionaryLookupAnnotatorUMLS.xml"/>
</delegateAnalysisEngine>
<delegateAnalysisEngine key="StatusAnnotator">
<import location="../../../ctakes-ne-contexts/desc/StatusAnnotator.xml"/>
@@ -47,24 +47,9 @@
<delegateAnalysisEngine key="SentenceDetectorAnnotator">
<import location="../../../ctakes-core/desc/analysis_engine/SentenceDetectorAnnotator.xml"/>
</delegateAnalysisEngine>
- <!-- By default, the dictionary lookup window is Sentence.
- The change was made in 3.2.1 because experiments showed that many terms were missed when relying upon the
- accuracy of LookupWindowAnnotator to correctly identify all present full noun phrases.
- Instead, reliance is now upon the fact that most terms in the dictionary itself are (or fit in) noun phrases.
- To revert to LookupWindowAnnotation:
- 1. uncomment the following lines to load the LookupWindowAnnotator,
- 2. uncomment the LookupWindowAnnotator line in <fixedFlow>,
- 3. uncomment the LookupWindowAnnotation line in <capability> <outputs> <type>
- 4. in ctakes-dictionary-lookup-fast .. /desc/analysis_engine/UmlsLookupAnnotator.xml
- switch the value for <nameValuePair> windowAnnotations.
- LookupWindowAnnotation is still there, just commented
- 5. also uncomment <capability> <inputs> <type> ... LookupWindowAnnotation in UmlsLookupAnnotator.xml
- The AdjustNounPhrase*** annotators have been left in case another module needs them.
- I leave it to somebody with more applicable knowledge to remove them from the flow.
- -->
- <!--<delegateAnalysisEngine key="LookupWindowAnnotator">-->
- <!--<import location="LookupWindowAnnotator.xml"/>-->
- <!--</delegateAnalysisEngine>-->
+ <delegateAnalysisEngine key="LookupWindowAnnotator">
+ <import location="LookupWindowAnnotator.xml"/>
+ </delegateAnalysisEngine>
<delegateAnalysisEngine key="AdjustNounPhraseToIncludeFollowingNP">
<import location="../../../ctakes-chunker/desc/AdjustNounPhraseToIncludeFollowingNP.xml"/>
</delegateAnalysisEngine>
@@ -167,7 +152,7 @@ and requires an UMLS license. Please up
<node>Chunker</node>
<node>AdjustNounPhraseToIncludeFollowingNP</node>
<node>AdjustNounPhraseToIncludeFollowingPPNP</node>
- <!--<node>LookupWindowAnnotator</node>-->
+ <node>LookupWindowAnnotator</node>
<node>DictionaryLookupAnnotatorDB</node>
<node>DependencyParser</node>
<node>SemanticRoleLabeler</node>