You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ma...@apache.org on 2013/01/25 19:29:50 UTC

svn commit: r1438632 - in /incubator/ctakes/trunk/ctakes-dictionary-lookup: ./ desc/analysis_engine/ doc/ doc/assertion_cue_phrase_index/ doc/assertion_cue_phrase_index/psv/ src/main/java/org/apache/ctakes/dictionary/assertion/ src/main/java/org/apache...

Author: mattcoarr
Date: Fri Jan 25 18:29:49 2013
New Revision: 1438632

URL: http://svn.apache.org/viewvc?rev=1438632&view=rev
Log:
adding new analysis engine and lucene index (along with source resources to build the lucene index) for assertion cue phrase lookup

Added:
    incubator/ctakes/trunk/ctakes-dictionary-lookup/desc/analysis_engine/AssertionCuePhraseDictionaryLookupAnnotator.xml
    incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/
    incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/
    incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/README
    incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/psv/
    incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/psv/conditional.psv
    incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/psv/hypothetical.psv
    incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/psv/negation_cue_class.psv
    incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/psv/not_patient_cue_list.psv
    incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/psv/speculation.psv
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/assertion/
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/assertion/CreateAssertionLuceneIndexFromDelimitedFile.java
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/assertion/package-info.java
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/AssertionCuePhraseConsumerImpl.java
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/AssertionCuePhraseDictionaryLookupAnnotator.xml
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/AssertionLookupDesc.xml
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0.fdt   (with props)
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0.fdx   (with props)
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0.fnm   (with props)
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0.si   (with props)
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0_Lucene40_0.frq   (with props)
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0_Lucene40_0.prx   (with props)
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0_Lucene40_0.tim   (with props)
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0_Lucene40_0.tip   (with props)
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0_nrm.cfe   (with props)
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0_nrm.cfs   (with props)
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/segments.gen   (with props)
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/segments_1   (with props)
Modified:
    incubator/ctakes/trunk/ctakes-dictionary-lookup/pom.xml

Added: incubator/ctakes/trunk/ctakes-dictionary-lookup/desc/analysis_engine/AssertionCuePhraseDictionaryLookupAnnotator.xml
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/desc/analysis_engine/AssertionCuePhraseDictionaryLookupAnnotator.xml?rev=1438632&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/desc/analysis_engine/AssertionCuePhraseDictionaryLookupAnnotator.xml (added)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/desc/analysis_engine/AssertionCuePhraseDictionaryLookupAnnotator.xml Fri Jan 25 18:29:49 2013
@@ -0,0 +1,117 @@
+<?xml version="1.0" encoding="UTF-8"?><taeDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>true</primitive>
+  <annotatorImplementationName>org.apache.ctakes.dictionary.lookup.ae.DictionaryLookupAnnotator</annotatorImplementationName>
+  <analysisEngineMetaData>
+    <name>AssertionCuePhraseDictionaryLookupAnnotator</name>
+    <description/>
+    <version/>
+    <vendor/>
+    <configurationParameters>
+      <configurationParameter>
+        <name>maxListSize</name>
+        <description>Specifies the maximum number of items to be returned from an lucene query.</description>
+        <type>Integer</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>maxListSize</name>
+        <value>
+          <integer>2147483647</integer>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription/>
+    <typePriorities/>
+    <fsIndexCollection/>
+    <capabilities>
+      <capability>
+        <inputs>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.BaseToken</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textspan.LookupWindowAnnotation</type>
+        </inputs>
+        <outputs>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.temporary.assertion.AssertionCuePhraseAnnotation</type>
+        </outputs>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
+  <externalResourceDependencies>
+    <externalResourceDependency>
+      <key>AssertionCuePhraseIndexReader</key>
+      <description/>
+      <interfaceName>org.apache.ctakes.core.resource.LuceneIndexReaderResource</interfaceName>
+      <optional>false</optional>
+    </externalResourceDependency>
+  </externalResourceDependencies>
+  <resourceManagerConfiguration>
+    <externalResources>
+      <externalResource>
+        <name>AssertionCuePhraseIndex</name>
+        <description/>
+        <configurableDataResourceSpecifier>
+          <url/>
+          <resourceMetaData>
+            <name/>
+            <configurationParameters>
+              <configurationParameter>
+                <name>UseMemoryIndex</name>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>true</mandatory>
+              </configurationParameter>
+              <configurationParameter>
+                <name>IndexDirectory</name>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>true</mandatory>
+              </configurationParameter>
+            </configurationParameters>
+            <configurationParameterSettings>
+              <nameValuePair>
+                <name>UseMemoryIndex</name>
+                <value>
+                  <boolean>true</boolean>
+                </value>
+              </nameValuePair>
+              <nameValuePair>
+                <name>IndexDirectory</name>
+                <value>
+                  <string>org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index</string>
+                </value>
+              </nameValuePair>
+            </configurationParameterSettings>
+          </resourceMetaData>
+        </configurableDataResourceSpecifier>
+        <implementationName>org.apache.ctakes.core.resource.LuceneIndexReaderResourceImpl</implementationName>
+      </externalResource>
+      <externalResource>
+        <name>AssertionCuePhraseLookupDescriptorFile</name>
+        <description/>
+        <fileResourceSpecifier>
+          <fileUrl>file:org/apache/ctakes/dictionary/lookup/AssertionLookupDesc.xml</fileUrl>
+        </fileResourceSpecifier>
+        <implementationName>org.apache.ctakes.core.resource.FileResourceImpl</implementationName>
+      </externalResource>
+    </externalResources>
+    <externalResourceBindings>
+      <externalResourceBinding>
+        <key>LookupDescriptor</key>
+        <resourceName>AssertionCuePhraseLookupDescriptorFile</resourceName>
+      </externalResourceBinding>
+      <externalResourceBinding>
+        <key>AssertionCuePhraseIndexReader</key>
+        <resourceName>AssertionCuePhraseIndex</resourceName>
+      </externalResourceBinding>
+    </externalResourceBindings>
+  </resourceManagerConfiguration>
+</taeDescription>

Added: incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/README
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/README?rev=1438632&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/README (added)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/README Fri Jan 25 18:29:49 2013
@@ -0,0 +1,19 @@
+This directory contains the resources needed to create the lucene index for assertion cue phrae lookup.
+
+To build the index, run the class (in src/main/java):
+  org.apache.ctakes.dictionary.assertion.CreateAssertionLuceneIndexFromDelimitedFile
+
+The files in this directory are pipe delimited files with the following format:
+
+  cue_phrase:cue_phrase_category:cue_phrase_family
+
+The "cue phrase" is the string itself.  For example, "no evidence of".
+
+The "cue phrase category" is something like -- no, not, speculation_low, speculation_high, etc.
+
+The "cue_phrase_family" is the general category of cue phrase.  Generally this aligns with the assertion attribute being set:
+  * negation (for the polarity attribute)
+  * uncertainty
+  * subject
+  * generic
+

Added: incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/psv/conditional.psv
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/psv/conditional.psv?rev=1438632&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/psv/conditional.psv (added)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/psv/conditional.psv Fri Jan 25 18:29:49 2013
@@ -0,0 +1,13 @@
+on exertion|conditional|conditional
+on exercise|conditional|conditional
+with exertion|conditional|conditional
+with exercise|conditional|conditional
+exertional|conditional|conditional
+allergy|conditional|conditional
+allergies|conditional|conditional
+allergic to|conditional|conditional
+intolerance|conditional|conditional
+after|conditional|conditional
+post|conditional|conditional
+while|conditional|conditional
+when|conditional|conditional

Added: incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/psv/hypothetical.psv
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/psv/hypothetical.psv?rev=1438632&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/psv/hypothetical.psv (added)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/psv/hypothetical.psv Fri Jan 25 18:29:49 2013
@@ -0,0 +1,38 @@
+avoid|hypothetical|hypothetical
+avoiding|hypothetical|hypothetical
+avoidance|hypothetical|hypothetical
+if|hypothetical|hypothetical
+unless|hypothetical|hypothetical
+monitor for|hypothetical|hypothetical
+monitors for|hypothetical|hypothetical
+monitored for|hypothetical|hypothetical
+monitoring for|hypothetical|hypothetical
+prn|hypothetical|hypothetical
+PRN|hypothetical|hypothetical
+p.r.n.|hypothetical|hypothetical
+P.R.N.|hypothetical|hypothetical
+as needed for|hypothetical|hypothetical
+as necessary for |hypothetical|hypothetical
+call for|hypothetical|hypothetical
+call with|hypothetical|hypothetical
+return for|hypothetical|hypothetical
+return with|hypothetical|hypothetical
+return|hypothetical|hypothetical
+watch for|hypothetical|hypothetical
+report new|hypothetical|hypothetical
+potential|hypothetical|hypothetical
+potentially|hypothetical|hypothetical
+instructions for|hypothetical|hypothetical
+prevent|hypothetical|hypothetical
+preventing|hypothetical|hypothetical
+prevention|hypothetical|hypothetical
+risk|hypothetical|hypothetical
+risk stratification for|hypothetical|hypothetical
+future|hypothetical|hypothetical
+immunization against|hypothetical|hypothetical
+should|hypothetical|hypothetical
+should he experience|hypothetical|hypothetical
+should she experience|hypothetical|hypothetical
+should the patient experience|hypothetical|hypothetical
+prophylaxis|hypothetical|hypothetical
+prophylactic|hypothetical|hypothetical

Added: incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/psv/negation_cue_class.psv
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/psv/negation_cue_class.psv?rev=1438632&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/psv/negation_cue_class.psv (added)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/psv/negation_cue_class.psv Fri Jan 25 18:29:49 2013
@@ -0,0 +1,149 @@
+not|not|negation
+n't|not|negation
+&apos;t|not|negation
+didn't|not|negation
+doesn't|not|negation
+don't|not|negation
+isn't|not|negation
+aren't|not|negation
+wasn't|not|negation
+weren't|not|negation
+hadn't|not|negation
+hasn't|not|negation
+haven't|not|negation
+won't|not|negation
+wouldn't|not|negation
+never|not|negation
+no longer|not|negation
+in no way|not|negation
+neither|neither|negation
+nor|neither|negation
+none|none|negation
+nobody|none|negation
+no one|none|negation
+nowhere|none|negation
+no place|none|negation
+no|no|negation
+without|no|negation
+absence of|no|negation
+free of|no|negation
+clear of|no|negation
+clear to|no|negation
+negative for|no|negation
+neg for|no|negation
+negative for|no|negation
+unremarkable for|no|negation
+unassociated with|no|negation
+( - )|no|negation
+non|no|negation
+lack of|no|negation
+lack|no|negation
+lacked|no|negation
+lacks|no|negation
+lacking|no|negation
+impossible|fail|negation
+cannot|fail|negation
+can't|fail|negation
+couldn't|fail|negation
+inability|fail|negation
+unable|fail|negation
+forget|fail|negation
+forgets|fail|negation
+forgetting|fail|negation
+forgot|fail|negation
+forgotten|fail|negation
+fail|fail|negation
+failed|fail|negation
+failing|fail|negation
+fails|fail|negation
+failure|fail|negation
+free of|absent|negation
+free|absent|negation
+absent|absent|negation
+immune to|absent|negation
+denied|deny|negation
+denies|deny|negation
+deny|deny|negation
+denying|deny|negation
+avoided|avoid|negation
+avoids|avoid|negation
+prevented|avoid|negation
+prevents|negation
+prophylaxis against|avoid|negation
+shouldn't|avoid|negation
+mustn't|avoid|negation
+unwilling|decline|negation
+unwillingness|decline|negation
+reluctance|decline|negation
+reluctant|decline|negation
+decline|decline|negation
+declined|decline|negation
+declines|decline|negation
+declining|decline|negation
+refusal|decline|negation
+refuse|decline|negation
+refused|decline|negation
+refuses|decline|negation
+refusing|decline|negation
+excluded|ruleout|negation
+excludes|ruleout|negation
+ruled out|ruleout|negation
+rules out|ruleout|negation
+discontinuance|stop|negation
+discontinuation|stop|negation
+discontinue|stop|negation
+discontinued|stop|negation
+discontinues|stop|negation
+discontinuing|stop|negation
+quit|stop|negation
+quits|stop|negation
+quitting|stop|negation
+stop|stop|negation
+stopped|stop|negation
+stopping|stop|negation
+stops|stop|negation
+used to|stop|negation
+held|stop|negation
+hold|stop|negation
+holding|stop|negation
+holds|stop|negation
+on hold|stop|negation
+off|stop|negation
+elimination|stop|negation
+eliminate|stop|negation
+eliminated|stop|negation
+eliminates|stop|negation
+eliminating|stop|negation
+eradication|stop|negation
+eradicate|stop|negation
+eradicated|stop|negation
+eradicates|stop|negation
+eradicating|stop|negation
+removal|stop|negation
+remove|stop|negation
+removed|stop|negation
+removes|stop|negation
+removing|stop|negation
+purge|stop|negation
+purged|stop|negation
+purges|stop|negation
+purging|stop|negation
+wipe out|stop|negation
+wiped out|stop|negation
+wipes out|stop|negation
+wiping out|stop|negation
+rid|stop|negation
+interrupt|stop|negation
+interruption|stop|negation
+interruptions|stop|negation
+resolved|disappear|negation
+resolution|disappear|negation
+disappeared|disappear|negation
+disappearance|disappear|negation
+rather than|exception|negation
+instead of|exception|negation
+except|exception|negation
+exception|exception|negation
+besides|exception|negation
+aside from|exception|negation
+other than|exception|negation

Added: incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/psv/not_patient_cue_list.psv
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/psv/not_patient_cue_list.psv?rev=1438632&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/psv/not_patient_cue_list.psv (added)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/psv/not_patient_cue_list.psv Fri Jan 25 18:29:49 2013
@@ -0,0 +1,56 @@
+mother|not_patient|not_patient
+father|not_patient|not_patient
+parent|not_patient|not_patient
+parents|not_patient|not_patient
+sister|not_patient|not_patient
+sisters|not_patient|not_patient
+brother|not_patient|not_patient
+brothers|not_patient|not_patient
+sibling|not_patient|not_patient
+siblings|not_patient|not_patient
+grandfather|not_patient|not_patient
+grandfathers|not_patient|not_patient
+grandmother|not_patient|not_patient
+grandmothers|not_patient|not_patient
+cousin|not_patient|not_patient
+cousins|not_patient|not_patient
+aunt|not_patient|not_patient
+aunts|not_patient|not_patient
+uncle|not_patient|not_patient
+uncles|not_patient|not_patient
+stepfather|not_patient|not_patient
+stepmother|not_patient|not_patient
+cousin|not_patient|not_patient
+cousins|not_patient|not_patient
+niece|not_patient|not_patient
+nieces|not_patient|not_patient
+nephew|not_patient|not_patient
+nephews|not_patient|not_patient
+wife|not_patient|not_patient
+husband|not_patient|not_patient
+spouse|not_patient|not_patient
+partner|not_patient|not_patient
+partners|not_patient|not_patient
+roommate|not_patient|not_patient
+roommates|not_patient|not_patient
+classmate|not_patient|not_patient
+classmates|not_patient|not_patient
+friend|not_patient|not_patient
+friends|not_patient|not_patient
+acquaintance|not_patient|not_patient
+acquaintances|not_patient|not_patient
+individual|not_patient|not_patient
+individuals|not_patient|not_patient
+person|not_patient|not_patient
+persons|not_patient|not_patient
+relative|not_patient|not_patient
+relatives|not_patient|not_patient
+family|not_patient|not_patient
+family member|not_patient|not_patient
+family members|not_patient|not_patient
+familiy history of |not_patient|not_patient
+fhx of|not_patient|not_patient
+fam hx of|not_patient|not_patient
+family hx|not_patient|not_patient
+twin|not_patient|not_patient
+familial|not_patient|not_patient

Added: incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/psv/speculation.psv
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/psv/speculation.psv?rev=1438632&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/psv/speculation.psv (added)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/doc/assertion_cue_phrase_index/psv/speculation.psv Fri Jan 25 18:29:49 2013
@@ -0,0 +1,194 @@
+uncertain|speculation_mid|speculation
+unsure|speculation_mid|speculation
+improbable|speculation_low|speculation
+unlikely|speculation_low|speculation
+doubtful|speculation_low|speculation
+doubt|speculation_low|speculation
+doubted|speculation_low|speculation
+doubting|speculation_low|speculation
+doubts|speculation_low|speculation
+possible|speculation_mid|speculation
+poss|speculation_mid|speculation
+hypothetical|speculation_mid|speculation
+questionable|speculation_mid|speculation
+consistent with|speculation_mid|speculation
+compatible with|speculation_mid|speculation
+equivocal for|speculation_mid|speculation
+suspicious for|speculation_mid|speculation
+potential|speculation_mid|speculation
+concerned for|speculation_mid|speculation
+concerning for|speculation_mid|speculation
+worrisome for|speculation_mid|speculation
+unclear|speculation_mid|speculation
+unknown|speculation_mid|speculation
+possibly|speculation_mid|speculation
+potentially|speculation_mid|speculation
+maybe|speculation_mid|speculation
+perhaps|speculation_mid|speculation
+possibilities|speculation_mid|speculation
+possibility|speculation_mid|speculation
+evidence for|speculation_mid|speculation
+evidence of|speculation_mid|speculation
+speculation|speculation_mid|speculation
+consideration|speculation_mid|speculation
+?|speculation_mid|speculation
+ddx|speculation_mid|speculation
+differential diagnosis|speculation_mid|speculation
+differential diagnoses|speculation_mid|speculation
+believe|speculation_high|speculation
+believed|speculation_high|speculation
+believes|speculation_high|speculation
+believing|speculation_high|speculation
+consider|speculation_mid|speculation
+considered|speculation_mid|speculation
+considering|speculation_mid|speculation
+considers|speculation_mid|speculation
+t/c|speculation_mid|speculation
+entertain|speculation_mid|speculation
+entertained|speculation_mid|speculation
+entertaining|speculation_mid|speculation
+entertains|speculation_mid|speculation
+question|speculation_mid|speculation
+questioned|speculation_mid|speculation
+questioning|speculation_mid|speculation
+questions|speculation_mid|speculation
+speculate|speculation_high|speculation
+speculated|speculation_high|speculation
+speculates|speculation_high|speculation
+speculating|speculation_high|speculation
+can|speculation_mid|speculation
+could|speculation_mid|speculation
+may|speculation_mid|speculation
+might|speculation_mid|speculation
+whether or not|speculation_mid|speculation
+whether|speculation_mid|speculation
+either|speculation_mid|speculation
+or|speculation_mid|speculation
+if|speculation_mid|speculation
+unless|speculation_low|speculation
+probable|speculation_high|speculation
+likely|speculation_high|speculation
+presumed|speculation_high|speculation
+presumptive|speculation_high|speculation
+apparent|speculation_high|speculation
+preliminary|speculation_high|speculation
+putative|speculation_high|speculation
+indicative of|speculation_high|speculation
+suggestive of|speculation_high|speculation
+typical of|speculation_high|speculation
+probably|speculation_high|speculation
+likely|speculation_high|speculation
+presumably|speculation_high|speculation
+apparently|speculation_high|speculation
+reportedly|speculation_high|speculation
+seemingly|speculation_high|speculation
+supposedly|speculation_high|speculation
+must|speculation_high|speculation
+appear|speculation_high|speculation
+appeared|speculation_high|speculation
+appearing|speculation_high|speculation
+appears|speculation_high|speculation
+seem|speculation_high|speculation
+seemed|speculation_high|speculation
+seems|speculation_high|speculation
+seeming|speculation_high|speculation
+estimate|speculation_high|speculation
+estimated|speculation_high|speculation
+estimates|speculation_high|speculation
+estimating|speculation_high|speculation
+feel that|speculation_high|speculation
+feeling that|speculation_high|speculation
+feels that|speculation_high|speculation
+felt that|speculation_high|speculation
+hypothesize|speculation_high|speculation
+hypothesized|speculation_high|speculation
+hypothesizes|speculation_high|speculation
+hypothesizing|speculation_high|speculation
+implied|speculation_high|speculation
+implies|speculation_high|speculation
+imply|speculation_high|speculation
+implying|speculation_high|speculation
+indicate|speculation_high|speculation
+indicated|speculation_high|speculation
+indicates|speculation_high|speculation
+indicating|speculation_high|speculation
+postulate|speculation_high|speculation
+postulated|speculation_high|speculation
+postulating|speculation_high|speculation
+postulates|speculation_high|speculation
+propose|speculation_high|speculation
+proposed|speculation_high|speculation
+proposes|speculation_high|speculation
+proposing|speculation_high|speculation
+suggest|speculation_high|speculation
+suggested|speculation_high|speculation
+suggesting|speculation_high|speculation
+suggests|speculation_high|speculation
+support|speculation_high|speculation
+supported|speculation_high|speculation
+supporting|speculation_high|speculation
+supports|speculation_high|speculation
+suppose|speculation_high|speculation
+supposed|speculation_high|speculation
+supposes|speculation_high|speculation
+supposing|speculation_high|speculation
+suspect|speculation_high|speculation
+suspected|speculation_high|speculation
+suspecting|speculation_high|speculation
+suspects|speculation_high|speculation
+think|speculation_high|speculation
+thinks|speculation_high|speculation
+thinking|speculation_high|speculation
+thought|speculation_high|speculation
+concern for|speculation_high|speculation
+concerns for|speculation_high|speculation
+implication|speculation_high|speculation
+implications|speculation_high|speculation
+impression of|speculation_high|speculation
+indication of|speculation_high|speculation
+indications of|speculation_high|speculation
+suspicion|speculation_high|speculation
+suspicions|speculation_high|speculation
+suggestion|speculation_high|speculation
+suggestions|speculation_high|speculation
+hypothesis|speculation_high|speculation
+hypotheses|speculation_high|speculation
+probability|speculation_high|speculation
+probabilities|speculation_high|speculation
+sign of|speculation_high|speculation
+signs of|speculation_high|speculation
+v.s.|speculation_high|speculation
+versus|speculation_high|speculation
+vs.|speculation_high|speculation
+vs|speculation_high|speculation
+r / o|speculation_mid|speculation
+ro|speculation_mid|speculation
+r/o|speculation_mid|speculation
+rule out|speculation_mid|speculation
+ruling out|speculation_mid|speculation
+rule - out|speculation_mid|speculation
+ruling - out|speculation_mid|speculation
+test for|speculation_mid|speculation
+tested for|speculation_mid|speculation
+testing for|speculation_mid|speculation
+tests for|speculation_mid|speculation
+check for|speculation_mid|speculation
+checked for|speculation_mid|speculation
+checking for|speculation_mid|speculation
+checks for|speculation_mid|speculation
+evaluate for|speculation_mid|speculation
+eval for|speculation_mid|speculation
+evaluated for|speculation_mid|speculation
+evaluates for|speculation_mid|speculation
+evaluating for|speculation_mid|speculation
+evaluation for|speculation_mid|speculation
+assess for|speculation_mid|speculation
+assessment for|speculation_mid|speculation
+monitor for|speculation_mid|speculation
+monitored for|speculation_mid|speculation
+monitoring for|speculation_mid|speculation
+monitors for|speculation_mid|speculation
+seriologies for|speculation_mid|speculation
+seriology for|speculation_mid|speculation
+work up for|speculation_mid|speculation
+workup for|speculation_mid|speculation

Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/pom.xml
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/pom.xml?rev=1438632&r1=1438631&r2=1438632&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/pom.xml (original)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/pom.xml Fri Jan 25 18:29:49 2013
@@ -87,6 +87,7 @@
 						</goals>
 						<configuration>
 							<includeArtifactIds>ctakes-resources-umls2011ab</includeArtifactIds>
+              <!-- <includes>**/*</includes> -->
 							<outputDirectory>${project.build.directory}/classes</outputDirectory>
 						</configuration>
 					</execution>

Added: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/assertion/CreateAssertionLuceneIndexFromDelimitedFile.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/assertion/CreateAssertionLuceneIndexFromDelimitedFile.java?rev=1438632&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/assertion/CreateAssertionLuceneIndexFromDelimitedFile.java (added)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/assertion/CreateAssertionLuceneIndexFromDelimitedFile.java Fri Jan 25 18:29:49 2013
@@ -0,0 +1,448 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.dictionary.assertion;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.StringTokenizer;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.Version;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+
+import org.apache.ctakes.core.nlp.tokenizer.OffsetComparator;
+import org.apache.ctakes.core.nlp.tokenizer.Token;
+import org.apache.ctakes.core.nlp.tokenizer.TokenizerPTB;
+
+/**
+ * Driver for populating a Lucene Index with assertion cue phrases, so that the
+ * tokenization of the dictionary entries matches the tokenization that will be
+ * done to clinical text during pipeline processing. Just as the pipeline can
+ * use a file of hyphenated words to control which words should be considered
+ * as a single token, the creation of the dictionary entries can use a file of
+ * hyphenated words so the dictionary entries are tokenized in the same way as
+ * the clinical text will be.
+ */
+public class CreateAssertionLuceneIndexFromDelimitedFile {
+	private static TokenizerPTB tokenizer = new TokenizerPTB();
+
+	// The path to a directory containing one or more pipe-delimited files
+  // A new directory "assertion_cue_phrase_index" will be created in the
+  // parent. This new directory will be the lucene index directory.
+	private static String directoryOfDelimitedFiles = null;
+	// directoryOfDelimitedFiles =
+	// "/temp/pipe-delimited-dictionary-data/RxNorm";
+
+	private IndexWriter iwriter = null;
+
+	private int idCount = 0;
+
+	private final String ID = "UNIQUE_DOCUMENT_IDENTIFIER_FIELD";
+
+	private final String rxNormCode = "codeRxNorm";
+	private final String Code = "code";
+	private final String CodeToken = "codeTokenized";
+	private final String FirstWord = "first_word";
+	private final String OtherDesig = "other_designation";
+	private final String PreferDesig = "preferred_designation";
+	
+	public static final String CUE_PHRASE_FIELD_NAME = "cuePhrase";
+	public static final String CUE_PHRASE_CATEGORY_FIELD_NAME = "cuePhraseCategory";
+  public static final String CUE_PHRASE_FAMILY_FIELD_NAME = "cuePhraseFamily";
+  public static final String CUE_PHRASE_FIRST_WORD_FIELD_NAME = "cuePhraseFirstWord";
+	
+
+	/**
+	 * Constructor
+	 * 
+	 * @param Tokenizer
+	 *            Used to tokenize the dictionary entries
+	 */
+	public CreateAssertionLuceneIndexFromDelimitedFile(TokenizerPTB tokenizer)
+			throws Exception {
+		Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
+		String defaultLoc = new File(directoryOfDelimitedFiles)
+				.getAbsolutePath();
+		boolean error = false;
+		long numEntries = 0;
+		try {
+			Directory directory = FSDirectory.open(new File(
+					new File(defaultLoc).getParent() + "/assertion_cue_phrase_index"));
+
+			IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_40, analyzer);
+			iwriter = new IndexWriter(directory, indexWriterConfig);
+//      iwriter = new IndexWriter(directory, analyzer, true,
+//          IndexWriter.MaxFieldLength.LIMITED);
+			// Process multiple files in directory
+
+			File file = new File(defaultLoc);
+			if (file.isDirectory()) {
+				String[] processFiles = file.list();
+				for (int i = 0; i < processFiles.length; i++) {
+					System.out.println("Process Each File in " + file.getName()
+							+ "...");
+					File nextFile = new File(directoryOfDelimitedFiles + "/"
+							+ processFiles[i]);
+
+					BufferedReader br = new BufferedReader(new FileReader(
+							nextFile));
+					String record = "";
+					while ((record = br.readLine()) != null) {
+						// System.out.println(" record so far out of " + record
+						// );
+					  
+					  String splitRecord[] = record.split("\\|");
+					  if (splitRecord.length == 0)
+					  { continue; }
+					  String cuePhrase = splitRecord[0];
+					  String cuePhraseCategory = "default_category";
+					  String cuePhraseFamily = "default_family";
+					  if (splitRecord.length >= 2)
+					  {
+					    cuePhraseCategory = splitRecord[1];
+					    cuePhraseFamily   = splitRecord[2];
+					    if (cuePhraseCategory == null || cuePhraseCategory.isEmpty())
+					    {
+					      cuePhraseCategory = "category__" + cuePhraseFamily;
+					    }
+					  }
+
+//						String ssubstring = cueWordCategory.substring(cueWordCategory
+//								.indexOf('|') + 1);
+//						String source = ssubstring.substring(0,
+//								ssubstring.indexOf('|'));
+//
+//						String tsubstring = ssubstring.substring(ssubstring
+//								.indexOf('|') + 1);
+//						String codeFromSource = tsubstring.substring(0,
+//								tsubstring.indexOf('|'));
+//
+//						String usubstring = tsubstring.substring(tsubstring
+//								.indexOf('|') + 1);
+//						String isPreferred = usubstring.substring(0,
+//								usubstring.indexOf('|'));
+//
+//						String semIds = usubstring.substring(usubstring
+//								.indexOf('|') + 1);
+						// System.out.println(" " + cui +
+						// " processed so far out of " +
+						// propertyValue + " -- " + sourceCC +" ispref "
+						// +isPreferred +
+						// " semIds " +semIds);
+//						writeToFormatLucene(cueWord, propertyValue, source,
+//								codeFromSource, isPreferred, semIds);
+            writeToFormatLucene(cuePhrase, cuePhraseCategory, cuePhraseFamily);
+						numEntries++;
+					}
+				}
+			}
+		} catch (IOException io) {
+			System.out.println("IO exception caught");
+			error = true;
+		} finally {
+			try {
+				iwriter.maybeMerge();
+				iwriter.close();
+				if (!error) {
+					System.out.println("Index created with " + numEntries
+							+ " entries.");
+				}
+			} catch (IOException io) {
+				System.out.println("IO exception caught");
+			}
+		}
+	}
+
+	public static void main(String[] args) {
+		System.gc();
+
+		if (args.length == 1) { // If no file of hyphenated words given
+			try {
+				directoryOfDelimitedFiles = args[0];
+				tokenizer = new TokenizerPTB();
+				new CreateAssertionLuceneIndexFromDelimitedFile(tokenizer);
+			} catch (Exception e) {
+				e.printStackTrace();
+			}
+		} else if (args.length == 3) { // else, use the file of hyphenated words
+										// during tokenization
+			try {
+
+				directoryOfDelimitedFiles = args[0];
+				// ** hyphnated file no longer needed. using the new PTB
+				// tokenizer instead. **
+				// String hyphFileLoc = args[1];
+				// int freqCutoff = Integer.parseInt(args[2]);
+				// Map hyphMap = loadHyphMap(hyphFileLoc);
+				// System.out.println("Processing hyphMap from : " +
+				// hyphFileLoc);
+
+				tokenizer = new TokenizerPTB();
+				new CreateAssertionLuceneIndexFromDelimitedFile(tokenizer);
+			} catch (Exception e) {
+				e.printStackTrace();
+			}
+		} else {
+			System.out.println(getUsage());
+		}
+
+	}
+
+	/**
+	 * Loads text from a file.
+	 * 
+	 * @param filename
+	 * @return
+	 * @throws FileNotFoundException
+	 * @throws IOException
+	 */
+	public static String load(String filename) throws FileNotFoundException,
+			IOException {
+		String msg = "";
+		File f = new File(filename);
+		BufferedReader br = new BufferedReader(new FileReader(f));
+		String line = br.readLine();
+		while (line != null) {
+			msg += line + "\n";
+			line = br.readLine();
+		}
+		br.close();
+
+		return msg;
+	}
+
+	/**
+	 * Loads hyphenated words and a frequency value for each, from a file.
+	 * 
+	 * @param filename
+	 * @return
+	 * @throws FileNotFoundException
+	 * @throws IOException
+	 */
+	public static Map loadHyphMap(String filename)
+			throws FileNotFoundException, IOException {
+		Map hyphMap = new HashMap();
+		File f = new File(filename);
+		BufferedReader br = new BufferedReader(new FileReader(f));
+		String line = br.readLine();
+		while (line != null) {
+			StringTokenizer st = new StringTokenizer(line, "|");
+			if (st.countTokens() == 2) {
+				String hyphWord = st.nextToken();
+				Integer freq = new Integer(st.nextToken());
+				hyphMap.put(hyphWord.toLowerCase(), freq);
+			} else {
+				System.out.println("Invalid hyphen file line: " + line);
+			}
+			line = br.readLine();
+		}
+		br.close();
+
+		return hyphMap;
+	}
+
+	/**
+	 * Prints out the tokenized results, for debug use.
+	 * 
+	 * @param text
+	 * @param results
+	 */
+	public static void printResults(String text, List results) {
+		System.out.println("Text: " + text);
+		for (int i = 0; i < results.size(); i++) {
+			Token t = (Token) results.get(i);
+			String typeStr = "";
+			switch (t.getType()) {
+			case Token.TYPE_WORD:
+				typeStr = "word       ";
+				break;
+			case Token.TYPE_PUNCT:
+				typeStr = "punctuation";
+				break;
+			case Token.TYPE_NUMBER:
+				typeStr = "number     ";
+				break;
+			case Token.TYPE_EOL:
+				typeStr = "end of line";
+				break;
+			case Token.TYPE_CONTRACTION:
+				typeStr = "contraction";
+				break;
+			case Token.TYPE_SYMBOL:
+				typeStr = "symbol     ";
+				break;
+			default:
+				typeStr = "unknown    ";
+			}
+
+			String capsStr = "";
+			switch (t.getCaps()) {
+			case Token.CAPS_ALL:
+				capsStr = "A";
+				break;
+			case Token.CAPS_NONE:
+				capsStr = "N";
+				break;
+			case Token.CAPS_MIXED:
+				capsStr = "M";
+				break;
+			case Token.CAPS_FIRST_ONLY:
+				capsStr = "F";
+				break;
+			default:
+				capsStr = "?";
+			}
+
+			String numPosStr = "";
+			switch (t.getNumPosition()) {
+			case Token.NUM_FIRST:
+				numPosStr = "F";
+				break;
+			case Token.NUM_MIDDLE:
+				numPosStr = "M";
+				break;
+			case Token.NUM_LAST:
+				numPosStr = "L";
+				break;
+			case Token.NUM_NONE:
+				numPosStr = "N";
+				break;
+			default:
+				numPosStr = "?";
+			}
+
+			String intStr = "";
+			if (t.isInteger()) {
+				intStr = "Y";
+			} else {
+				intStr = "N";
+			}
+
+			System.out.println("Token:" + " type=[" + typeStr + "]" + " caps=["
+					+ capsStr + "]" + " npos=[" + numPosStr + "]" + " int=["
+					+ intStr + "]" + " offsets=[" + t.getStartOffset() + ","
+					+ t.getEndOffset() + "]" + "\t\t" + "text=["
+					+ text.substring(t.getStartOffset(), t.getEndOffset())
+					+ "]");
+		}
+	}
+
+	/**
+	 * @return A string showing usage example (parameters)
+	 */
+	public static String getUsage() {
+		return "java LucenePopulateDriver <dir-containing-textfile(s)> [hyphenfile] [freqcutoff]";
+	}
+
+	protected void writeToFormatLucene(String cuePhrase, String cuePhraseCategory, String cuePhraseFamily) {
+
+		Document doc = new Document();
+
+		try {
+
+			// Print the name out
+
+			idCount++;
+			//if (idCount % 10000 == 0)
+				System.out.println(" " + idCount
+						+ " processed so far out of total");
+				
+			doc.add(new TextField("cuePhrase", cuePhrase, Field.Store.YES));
+			doc.add(new StringField("cuePhraseCategory", cuePhraseCategory, Field.Store.YES));
+			doc.add(new StringField("cuePhraseFamily", cuePhraseFamily, Field.Store.YES));
+			
+
+			List list = tokenizer.tokenize(cuePhrase);
+			Collections.sort(list, new OffsetComparator());
+
+			Iterator tokenItr = list.iterator();
+			Token t;
+			int tCount = 0;
+			String firstTokenText = "";
+			String tokenizedCuePhrase = "";
+
+			while (tokenItr.hasNext()) {
+				tCount++;
+				t = (Token) tokenItr.next();
+				if (tCount == 1) {
+					firstTokenText = t.getText(); // first token (aka
+													// "first word")
+					tokenizedCuePhrase += t.getText();
+				} else { // use blank to separate tokens
+					tokenizedCuePhrase = tokenizedCuePhrase + " " + t.getText();
+				}
+
+			}
+
+			doc.add(new StringField(CUE_PHRASE_FIRST_WORD_FIELD_NAME, firstTokenText, Field.Store.YES));
+
+			iwriter.addDocument(doc);
+
+			//String data = cui + "|" + firstTokenText + "|" + tokenizedDesc + "|" + codeInSource + "|" + source + "|" + semId + '\n';
+			String data = cuePhrase + "|" + cuePhraseCategory + "|" + tokenizedCuePhrase + '\n';
+			writeToFile (data);
+
+		} catch (IOException io) {
+			System.out.println("IOException in document : io "
+					+ io.getLocalizedMessage());
+
+		} catch (Exception exc) {
+			System.out.println("Exception in document : exc "
+					+ exc.getLocalizedMessage());
+		}
+
+		// writeToOutPutFile(cui + "|" + desc + "|" + source + "|" + cc + "|" +
+		// termStatus + "|" + semId);
+	}
+
+	public void writeToFile(String str) {
+		try {
+			// Create the output file of sample.txt
+			FileWriter fstream = new FileWriter(
+					"sample.txt",
+					true);
+
+			// Write data into the file
+			BufferedWriter out = new BufferedWriter(fstream);
+
+			out.write(str);
+			out.close();
+
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+	}
+}

Added: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/assertion/package-info.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/assertion/package-info.java?rev=1438632&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/assertion/package-info.java (added)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/assertion/package-info.java Fri Jan 25 18:29:49 2013
@@ -0,0 +1,8 @@
+/**
+ * 
+ */
+/**
+ * @author mcoarr
+ *
+ */
+package org.apache.ctakes.dictionary.assertion;
\ No newline at end of file

Added: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/AssertionCuePhraseConsumerImpl.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/AssertionCuePhraseConsumerImpl.java?rev=1438632&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/AssertionCuePhraseConsumerImpl.java (added)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/AssertionCuePhraseConsumerImpl.java Fri Jan 25 18:29:49 2013
@@ -0,0 +1,219 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.dictionary.lookup.ae;
+
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Properties;
+
+import org.apache.ctakes.core.resource.LuceneIndexReaderResource;
+import org.apache.ctakes.dictionary.lookup.MetaDataHit;
+import org.apache.ctakes.dictionary.lookup.vo.LookupHit;
+import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.ctakes.typesystem.type.refsem.OntologyConcept;
+import org.apache.ctakes.typesystem.type.temporary.assertion.AssertionCuePhraseAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.MedicationEventMention;
+import org.apache.log4j.Logger;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSArray;
+
+/**
+ * Implementation that takes Rxnorm dictionary lookup hits and stores only the
+ * ones that are also present in the Orange Book.
+ * 
+ * @author Mayo Clinic
+ */
+public class AssertionCuePhraseConsumerImpl extends BaseLookupConsumerImpl
+		implements LookupConsumer
+{
+  public static final String CUE_PHRASE_FIELD_NAME = "cuePhrase";
+  public static final String CUE_PHRASE_CATEGORY_FIELD_NAME = "cuePhraseCategory";
+  public static final String CUE_PHRASE_FAMILY_FIELD_NAME = "cuePhraseFamily";
+  public static final String CUE_PHRASE_FIRST_WORD_FIELD_NAME = "cuePhraseFirstWord";
+
+	// LOG4J logger based on class name
+	private Logger iv_logger = Logger.getLogger(getClass().getName());
+
+	private final String CODE_MF_PRP_KEY = "codeMetaField";
+
+	private final String CODING_SCHEME_PRP_KEY = "codingScheme";
+
+	private final String LUCENE_FILTER_RESRC_KEY_PRP_KEY = "luceneFilterExtResrcKey";
+
+	private Properties iv_props;
+
+	private IndexSearcher iv_searcher;
+	//ohnlp-Bugs-3296301 limits the search results to fixed 100 records.
+	// Added 'MaxListSize'
+	private int iv_maxHits;
+
+	public AssertionCuePhraseConsumerImpl(UimaContext aCtx, Properties props, int maxListSize)
+			throws Exception
+	{
+		// TODO property validation could be done here
+		iv_props = props;
+		iv_maxHits = maxListSize;
+		String resrcName = iv_props.getProperty(LUCENE_FILTER_RESRC_KEY_PRP_KEY);
+		LuceneIndexReaderResource resrc = (LuceneIndexReaderResource) aCtx.getResourceObject(resrcName);
+		iv_searcher = new IndexSearcher(resrc.getIndexReader());
+	}
+	public AssertionCuePhraseConsumerImpl(UimaContext aCtx, Properties props)
+	throws Exception
+	{
+		// TODO property validation could be done here
+		iv_props = props;
+		String resrcName = iv_props.getProperty(LUCENE_FILTER_RESRC_KEY_PRP_KEY);
+		LuceneIndexReaderResource resrc = (LuceneIndexReaderResource) aCtx.getResourceObject(resrcName);
+		iv_searcher = new IndexSearcher(resrc.getIndexReader());
+		iv_maxHits = Integer.MAX_VALUE;
+	}
+	public void consumeHits(JCas jcas, Iterator lhItr)
+			throws AnalysisEngineProcessException
+	{
+		Iterator hitsByOffsetItr = organizeByOffset(lhItr);
+		while (hitsByOffsetItr.hasNext())
+		{
+			Collection hitsAtOffsetCol = (Collection) hitsByOffsetItr.next();
+
+			// iterate over the LookupHit objects
+			// code is only valid if the covered text is also present in the
+			// filter
+			Iterator lhAtOffsetItr = hitsAtOffsetCol.iterator();
+			int neBegin = -1;
+			int neEnd = -1;
+			Collection validCodeCol = new HashSet();
+			while (lhAtOffsetItr.hasNext())
+			{
+				LookupHit lh = (LookupHit) lhAtOffsetItr.next();
+				neBegin = lh.getStartOffset();
+				neEnd = lh.getEndOffset();
+
+				String text = jcas.getDocumentText().substring(
+						lh.getStartOffset(),
+						lh.getEndOffset());
+				text = text.trim().toLowerCase();
+
+				MetaDataHit mdh = lh.getDictMetaDataHit();
+				String cuePhrase = mdh.getMetaFieldValue(AssertionCuePhraseConsumerImpl.CUE_PHRASE_FIELD_NAME);
+				String cuePhraseFirstWord = mdh.getMetaFieldValue(AssertionCuePhraseConsumerImpl.CUE_PHRASE_FIRST_WORD_FIELD_NAME);
+				String cuePhraseCategory = mdh.getMetaFieldValue(AssertionCuePhraseConsumerImpl.CUE_PHRASE_CATEGORY_FIELD_NAME);
+				String cuePhraseFamily = mdh.getMetaFieldValue(AssertionCuePhraseConsumerImpl.CUE_PHRASE_FAMILY_FIELD_NAME);
+				//String cuePhraseAssertionFamily = mdh.getMetaFieldValue(AssertionCuePhraseConsumerImpl.CUE_PHRASE_ASSERTION_FAMILY_FIELD_NAME);
+				
+//				String code = mdh.getMetaFieldValue(iv_props.getProperty(CODE_MF_PRP_KEY));
+//
+//				if (isValid("trade_name", text) || isValid("ingredient", text))
+//				{
+//					validCodeCol.add(code);
+//				}
+//				else
+//				{
+//					iv_logger.warn("Filtered out: "+text);
+//				}
+
+	      AssertionCuePhraseAnnotation cuePhraseAnnotation = new AssertionCuePhraseAnnotation(jcas);
+	      cuePhraseAnnotation.setBegin(neBegin);
+	      cuePhraseAnnotation.setEnd(neEnd);
+	      
+	      cuePhraseAnnotation.setCuePhrase(cuePhrase);
+	      cuePhraseAnnotation.setCuePhraseFirstWord(cuePhraseFirstWord);
+	      
+	      cuePhraseAnnotation.setCuePhraseCategory(cuePhraseCategory);
+	      cuePhraseAnnotation.setCuePhraseAssertionFamily(cuePhraseFamily);
+	      
+	      cuePhraseAnnotation.addToIndexes();
+
+			}
+			
+
+
+//			if (validCodeCol.size() > 0)
+//			{
+//				FSArray ocArr = createOntologyConceptArr(jcas, validCodeCol);
+//				IdentifiedAnnotation neAnnot = new MedicationEventMention(jcas); // medication NEs are EventMention
+//				neAnnot.setTypeID(CONST.NE_TYPE_ID_DRUG);
+//				neAnnot.setBegin(neBegin);
+//				neAnnot.setEnd(neEnd);
+//				neAnnot.setDiscoveryTechnique(CONST.NE_DISCOVERY_TECH_DICT_LOOKUP);
+//				neAnnot.setOntologyConceptArr(ocArr);
+//				neAnnot.addToIndexes();
+//			}
+		}
+	}
+
+	/**
+	 * For each valid code, a corresponding JCas OntologyConcept object is
+	 * created and stored in a FSArray.
+	 * 
+	 * @param jcas
+	 * @param validCodeCol
+	 * @return
+	 */
+	private FSArray createOntologyConceptArr(JCas jcas, Collection validCodeCol)
+	{
+		FSArray ocArr = new FSArray(jcas, validCodeCol.size());
+		int ocArrIdx = 0;
+		Iterator validCodeItr = validCodeCol.iterator();
+		while (validCodeItr.hasNext())
+		{
+			String validCode = (String) validCodeItr.next();
+			OntologyConcept oc = new OntologyConcept(jcas);
+			oc.setCode(validCode);
+			oc.setCodingScheme(iv_props.getProperty(CODING_SCHEME_PRP_KEY));
+
+			ocArr.set(ocArrIdx, oc);
+			ocArrIdx++;
+		}
+		return ocArr;
+	}
+
+	private boolean isValid(String fieldName, String str)
+			throws AnalysisEngineProcessException
+	{
+		try
+		{
+			Query q = new TermQuery(new Term(fieldName, str));
+
+            TopDocs topDoc = iv_searcher.search(q, iv_maxHits);
+            ScoreDoc[] hits = topDoc.scoreDocs;
+            if ((hits != null) && (hits.length > 0))
+            {
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+		}
+		catch (Exception e)
+		{
+			throw new AnalysisEngineProcessException(e);
+		}
+	}
+}
\ No newline at end of file

Added: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/AssertionCuePhraseDictionaryLookupAnnotator.xml
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/AssertionCuePhraseDictionaryLookupAnnotator.xml?rev=1438632&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/AssertionCuePhraseDictionaryLookupAnnotator.xml (added)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/AssertionCuePhraseDictionaryLookupAnnotator.xml Fri Jan 25 18:29:49 2013
@@ -0,0 +1,117 @@
+<?xml version="1.0" encoding="UTF-8"?><taeDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>true</primitive>
+  <annotatorImplementationName>org.apache.ctakes.dictionary.lookup.ae.DictionaryLookupAnnotator</annotatorImplementationName>
+  <analysisEngineMetaData>
+    <name>AssertionCuePhraseDictionaryLookupAnnotator</name>
+    <description/>
+    <version/>
+    <vendor/>
+    <configurationParameters>
+      <configurationParameter>
+        <name>maxListSize</name>
+        <description>Specifies the maximum number of items to be returned from an lucene query.</description>
+        <type>Integer</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>maxListSize</name>
+        <value>
+          <integer>2147483647</integer>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription/>
+    <typePriorities/>
+    <fsIndexCollection/>
+    <capabilities>
+      <capability>
+        <inputs>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.BaseToken</type>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textspan.LookupWindowAnnotation</type>
+        </inputs>
+        <outputs>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.temporary.assertion.AssertionCuePhraseAnnotation</type>
+        </outputs>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
+  <externalResourceDependencies>
+    <externalResourceDependency>
+      <key>AssertionCuePhraseIndexReader</key>
+      <description/>
+      <interfaceName>org.apache.ctakes.core.resource.LuceneIndexReaderResource</interfaceName>
+      <optional>false</optional>
+    </externalResourceDependency>
+  </externalResourceDependencies>
+  <resourceManagerConfiguration>
+    <externalResources>
+      <externalResource>
+        <name>AssertionCuePhraseIndex</name>
+        <description/>
+        <configurableDataResourceSpecifier>
+          <url/>
+          <resourceMetaData>
+            <name/>
+            <configurationParameters>
+              <configurationParameter>
+                <name>UseMemoryIndex</name>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>true</mandatory>
+              </configurationParameter>
+              <configurationParameter>
+                <name>IndexDirectory</name>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>true</mandatory>
+              </configurationParameter>
+            </configurationParameters>
+            <configurationParameterSettings>
+              <nameValuePair>
+                <name>UseMemoryIndex</name>
+                <value>
+                  <boolean>true</boolean>
+                </value>
+              </nameValuePair>
+              <nameValuePair>
+                <name>IndexDirectory</name>
+                <value>
+                  <string>org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index</string>
+                </value>
+              </nameValuePair>
+            </configurationParameterSettings>
+          </resourceMetaData>
+        </configurableDataResourceSpecifier>
+        <implementationName>org.apache.ctakes.core.resource.LuceneIndexReaderResourceImpl</implementationName>
+      </externalResource>
+      <externalResource>
+        <name>LookupDescriptorFile</name>
+        <description/>
+        <fileResourceSpecifier>
+          <fileUrl>file:org/apache/ctakes/dictionary/lookup/AssertionLookupDesc.xml</fileUrl>
+        </fileResourceSpecifier>
+        <implementationName>org.apache.ctakes.core.resource.FileResourceImpl</implementationName>
+      </externalResource>
+    </externalResources>
+    <externalResourceBindings>
+      <externalResourceBinding>
+        <key>LookupDescriptor</key>
+        <resourceName>LookupDescriptorFile</resourceName>
+      </externalResourceBinding>
+      <externalResourceBinding>
+        <key>AssertionCuePhraseIndexReader</key>
+        <resourceName>AssertionCuePhraseIndex</resourceName>
+      </externalResourceBinding>
+    </externalResourceBindings>
+  </resourceManagerConfiguration>
+</taeDescription>

Added: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/AssertionLookupDesc.xml
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/AssertionLookupDesc.xml?rev=1438632&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/AssertionLookupDesc.xml (added)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/AssertionLookupDesc.xml Fri Jan 25 18:29:49 2013
@@ -0,0 +1,162 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+-->
+<lookupSpecification>
+	<!--  Defines what dictionaries will be used in terms of implementation specifics and metaField configuration. -->
+	<dictionaries>
+	
+	  <!--
+		<dictionary id="DICT_UMLS_MS" externalResourceKey="UmlsIndexReader" caseSensitive="false">
+			<implementation>
+				<luceneImpl/>
+			</implementation>
+			<lookupField fieldName="first_word"/>
+			<metaFields>
+				<metaField fieldName="cui"/>
+				<metaField fieldName="tui"/>
+				<metaField fieldName="text"/>
+			</metaFields>
+		</dictionary>
+		-->
+	
+	  <!--
+		<dictionary id="DICT_RXNORM" externalResourceKey="RxnormIndexReader" caseSensitive="false">
+			<implementation>
+				<luceneImpl/>
+			</implementation>
+			<lookupField fieldName="first_word"/>
+			<metaFields>
+				<metaField fieldName="code"/>
+				<metaField fieldName="preferred_designation"/>
+				<metaField fieldName="other_designation"/>
+			</metaFields>
+		</dictionary>
+		-->
+    <dictionary id="DICT_ASSERTION_CUE_PHRASE" externalResourceKey="AssertionCuePhraseIndexReader" caseSensitive="false">
+      <implementation>
+        <luceneImpl/>
+      </implementation>
+      <lookupField fieldName="cuePhrase"/>
+      <metaFields>
+        <metaField fieldName="cuePhrase"/>
+        <metaField fieldName="cuePhraseCategory"/>
+        <metaField fieldName="assertionCuePhraseFamily"/>
+      </metaFields>
+    </dictionary>
+
+	
+	</dictionaries>
+	<!-- Binds together the components necessary to perform the complete lookup logic start to end. -->
+	<lookupBindings>
+	
+	  <!--
+		<lookupBinding>
+			<dictionaryRef idRef="DICT_UMLS_MS"/>
+			<lookupInitializer className="org.apache.ctakes.dictionary.lookup.ae.FirstTokenPermLookupInitializerImpl">
+				<properties>
+					<property key="textMetaFields" value="text"/>
+					<property key="maxPermutationLevel" value="5"/>
+					<!- -	<property key="windowAnnotations" value="org.apache.ctakes.typesystem.type.textspan.Sentence"/> - ->
+					<property key="windowAnnotations" value="org.apache.ctakes.typesystem.type.textspan.LookupWindowAnnotation"/>  
+					<property key="exclusionTags" value="VB,VBD,VBG,VBN,VBP,VBZ,CC,CD,DT,EX,LS,MD,PDT,POS,PP,PP$,PRP,PRP$,RP,TO,WDT,WP,WPS,WRB"/>
+				</properties>
+			</lookupInitializer>
+			<lookupConsumer className="org.apache.ctakes.dictionary.lookup.ae.UmlsToSnomedLuceneConsumerImpl">
+				<properties>
+					<property key="codingScheme" value="SNOMED"/>
+					<property key="cuiMetaField" value="cui"/>
+					<property key="tuiMetaField" value="tui"/>
+					<property key="cuiMappingField" value="cui"/>
+					<property key="snomedCodeMappingField" value="code"/>
+					<!- - the following values are used as TUIs for testing: T_AS, T_PR, T_DD, T_SS - ->
+					<property key="anatomicalSiteTuis" value="T021,T022,T023,T024,T025,T026,T029,T030,T_AS"/>
+					<property key="procedureTuis" value="T059,T060,T061,T_PR"/>
+					<property key="disorderTuis" value="T019,T020,T037,T046,T047,T048,T049,T050,T190,T191,T_DD"/>
+					<property key="findingTuis" value="T033,T034,T040,T041,T042,T043,T044,T045,T046,T056,T057,T184,T_SS"/>
+					<property key="luceneFilterExtResrcKey" value="UmlsIndexReader"/>
+				</properties>
+			</lookupConsumer>
+		</lookupBinding>
+		-->
+	
+	  <!--
+		<lookupBinding>
+			<dictionaryRef idRef="DICT_RXNORM"/>
+			<lookupInitializer className="org.apache.ctakes.dictionary.lookup.ae.FirstTokenPermLookupInitializerImpl">
+				<properties>
+					<property key="textMetaFields" value="preferred_designation|other_designation"/>
+					<property key="maxPermutationLevel" value="7"/>
+					<!- -	<property key="windowAnnotations" value="org.apache.ctakes.typesystem.type.textspan.Sentence"/> - ->
+					<property key="windowAnnotations" value="org.apache.ctakes.typesystem.type.textspan.LookupWindowAnnotation"/>  
+					<property key="exclusionTags" value="VB,VBD,VBG,VBN,VBP,VBZ,CC,CD,DT,EX,LS,MD,PDT,POS,PP,PP$,RP,TO,WDT,WP,WPS,WRB"/>
+				</properties>
+			</lookupInitializer>
+			<lookupConsumer className="org.apache.ctakes.dictionary.lookup.ae.OrangeBookFilterConsumerImpl">
+				<properties>
+					<property key="codingScheme" value="RXNORM"/>
+					<property key="codeMetaField" value="code"/>
+					<property key="luceneFilterExtResrcKey" value="OrangeBookIndexReader"/>
+				</properties>
+			</lookupConsumer>
+		</lookupBinding>
+		-->
+		
+    <lookupBinding>
+      <dictionaryRef idRef="DICT_ASSERTION_CUE_PHRASE"/>
+      <!-- org.apache.ctakes.dictionary.lookup.ae.DirectLookupInitializerImpl -->
+      <!-- org.apache.ctakes.dictionary.lookup.ae.FirstTokenPermLookupInitializerImpl -->
+      <lookupInitializer className="org.apache.ctakes.dictionary.lookup.ae.DirectLookupInitializerImpl">
+        <properties>
+          <property key="textMetaFields" value="cuePhraseCategory"/>
+          <!--  <property key="maxPermutationLevel" value="7"/>  -->
+          <!--  <property key="windowAnnotations" value="org.apache.ctakes.typesystem.type.textspan.Sentence"/> -->
+          <!--  org.apache.ctakes.typesystem.type.syntax.Chunk -->
+          <!--  org.apache.ctakes.typesystem.type.textspan.LookupWindowAnnotation -->
+          <property key="windowAnnotations" value="org.apache.ctakes.typesystem.type.syntax.Chunk"/>  
+          <!--  <property key="exclusionTags" value="VB,VBD,VBG,VBN,VBP,VBZ,CC,CD,DT,EX,LS,MD,PDT,POS,PP,PP$,RP,TO,WDT,WP,WPS,WRB"/> -->
+        </properties>
+      </lookupInitializer>
+<!--
+      <lookupInitializer className="org.apache.ctakes.dictionary.lookup.ae.FirstTokenPermLookupInitializerImpl">
+        <properties>
+          <property key="textMetaFields" value="cuePhraseCategory"/>
+          <property key="maxPermutationLevel" value="7"/>
+          <!- -  <property key="windowAnnotations" value="org.apache.ctakes.typesystem.type.textspan.Sentence"/> - ->
+          <!- -  org.apache.ctakes.typesystem.type.syntax.Chunk - ->
+          <!- -  org.apache.ctakes.typesystem.type.textspan.LookupWindowAnnotation - ->
+          <property key="windowAnnotations" value="org.apache.ctakes.typesystem.type.syntax.Chunk"/>  
+          <!- -  <property key="exclusionTags" value="VB,VBD,VBG,VBN,VBP,VBZ,CC,CD,DT,EX,LS,MD,PDT,POS,PP,PP$,RP,TO,WDT,WP,WPS,WRB"/> - ->
+        </properties>
+      </lookupInitializer>
+-->
+      <lookupConsumer className="org.apache.ctakes.dictionary.lookup.ae.AssertionCuePhraseConsumerImpl">
+        <properties>
+          <!--
+          <property key="codingScheme" value="RXNORM"/>
+          <property key="codeMetaField" value="code"/>
+          -->
+          <property key="luceneFilterExtResrcKey" value="AssertionCuePhraseIndexReader"/>
+        </properties>
+      </lookupConsumer>
+    </lookupBinding>
+
+	</lookupBindings>
+</lookupSpecification>

Added: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0.fdt
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0.fdt?rev=1438632&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0.fdt
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0.fdx
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0.fdx?rev=1438632&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0.fdx
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0.fnm
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0.fnm?rev=1438632&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0.fnm
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0.si
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0.si?rev=1438632&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0.si
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0_Lucene40_0.frq
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0_Lucene40_0.frq?rev=1438632&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0_Lucene40_0.frq
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0_Lucene40_0.prx
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0_Lucene40_0.prx?rev=1438632&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0_Lucene40_0.prx
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0_Lucene40_0.tim
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0_Lucene40_0.tim?rev=1438632&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0_Lucene40_0.tim
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0_Lucene40_0.tip
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0_Lucene40_0.tip?rev=1438632&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0_Lucene40_0.tip
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0_nrm.cfe
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0_nrm.cfe?rev=1438632&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0_nrm.cfe
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0_nrm.cfs
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0_nrm.cfs?rev=1438632&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/_0_nrm.cfs
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/segments.gen
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/segments.gen?rev=1438632&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/segments.gen
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/segments_1
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/segments_1?rev=1438632&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/resources/org/apache/ctakes/dictionary/lookup/assertion_cue_phrase_index/segments_1
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream