You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2016/10/19 21:37:33 UTC

svn commit: r1765723 - /ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/AssertionAnalysisEngineFit.java

Author: seanfinan
Date: Wed Oct 19 21:37:33 2016
New Revision: 1765723

URL: http://svn.apache.org/viewvc?rev=1765723&view=rev
Log:
uimafit version of assertion, with minor change and refactoring

Added:
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/AssertionAnalysisEngineFit.java

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/AssertionAnalysisEngineFit.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/AssertionAnalysisEngineFit.java?rev=1765723&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/AssertionAnalysisEngineFit.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/AssertionAnalysisEngineFit.java Wed Oct 19 21:37:33 2016
@@ -0,0 +1,216 @@
+package org.apache.ctakes.assertion.medfacts;
+
+import org.apache.ctakes.assertion.medfacts.i2b2.api.CharacterOffsetToLineTokenConverterCtakesImpl;
+import org.apache.ctakes.assertion.medfacts.i2b2.api.SingleDocumentProcessorCtakes;
+import org.apache.ctakes.assertion.medfacts.types.Concept;
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.mitre.jcarafe.jarafe.JarafeMEDecoder;
+import org.mitre.medfacts.i2b2.annotation.PartOfSpeechTagger;
+import org.mitre.medfacts.i2b2.annotation.ScopeParser;
+import org.mitre.medfacts.i2b2.api.ApiConcept;
+import org.mitre.medfacts.i2b2.api.AssertionDecoderConfiguration;
+import org.mitre.medfacts.i2b2.cli.BatchRunner;
+import org.mitre.medfacts.zoner.CharacterOffsetToLineTokenConverter;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.util.*;
+
+import static org.apache.ctakes.typesystem.type.constants.CONST.ATTR_SUBJECT_FAMILY_MEMBER;
+import static org.apache.ctakes.typesystem.type.constants.CONST.ATTR_SUBJECT_PATIENT;
+
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 10/19/2016
+ */
+public class AssertionAnalysisEngineFit extends JCasAnnotator_ImplBase {
+
+   static private final Logger LOGGER = Logger.getLogger( "AssertionAnalysisEngineFit" );
+
+   static public final String ASSERTION_MODEL_PARAM = "assertionModelResource";
+   static public final String SCOPE_MODEL_PARAM = "scopeModelResource";
+   static public final String CUE_MODEL_PARAM = "cueModelResource";
+   static public final String POS_MODEL_PARAM = "posModelResource";
+   static public final String ENABLED_FEATURES_PARAM = "enabledFeaturesResource";
+
+
+   @ConfigurationParameter(
+         name = ASSERTION_MODEL_PARAM,
+         defaultValue = "org/apache/ctakes/assertion/models/i2b2.model" )
+   private String _assertionModelPath;
+
+   @ConfigurationParameter(
+         name = SCOPE_MODEL_PARAM,
+         defaultValue = "org/apache/ctakes/assertion/models/scope.model" )
+   private String _scopeModelPath;
+
+   @ConfigurationParameter(
+         name = CUE_MODEL_PARAM,
+         defaultValue = "org/apache/ctakes/assertion/models/cue.model" )
+   private String _cueModelPath;
+
+   @ConfigurationParameter(
+         name = POS_MODEL_PARAM,
+         defaultValue = "org/apache/ctakes/assertion/models/pos.model" )
+   private String _posModelPath;
+
+   @ConfigurationParameter(
+         name = ENABLED_FEATURES_PARAM,
+         defaultValue = "org/apache/ctakes/assertion/models/featureFile11b" )
+   private String _enabledFeaturesPath;
+
+   private AssertionDecoderConfiguration _assertionDecoderConfiguration;
+
+   @Override
+   public void initialize( final UimaContext uimaContext ) throws ResourceInitializationException {
+      super.initialize( uimaContext );
+      // byte assertionModelContents[];
+      File assertionModelFile;
+      String scopeModelFilePath;
+      String cueModelFilePath;
+      String posModelFilePath;
+      File enabledFeaturesFile;
+      try {
+         assertionModelFile = FileLocator.locateFile( _assertionModelPath );
+         // assertionModelContents = StringHandling.readEntireContentsBinary(assertionModelFile);
+         scopeModelFilePath = FileLocator.getFullPath( _scopeModelPath );
+         cueModelFilePath = FileLocator.getFullPath( _cueModelPath );
+         posModelFilePath = FileLocator.getFullPath( _posModelPath );
+         enabledFeaturesFile = FileLocator.locateFile( _enabledFeaturesPath );
+      } catch ( FileNotFoundException fnfE ) {
+         throw new ResourceInitializationException( fnfE );
+      }
+      LOGGER.info( "scope model file: " + scopeModelFilePath );
+      LOGGER.info( "cue model file: " + cueModelFilePath );
+      LOGGER.info( "pos model file: " + posModelFilePath );
+      final AssertionDecoderConfiguration assertionDecoderConfiguration = new AssertionDecoderConfiguration();
+      final ScopeParser scopeParser = new ScopeParser( scopeModelFilePath, cueModelFilePath );
+      assertionDecoderConfiguration.setScopeParser( scopeParser );
+      final PartOfSpeechTagger posTagger = new PartOfSpeechTagger( posModelFilePath );
+      assertionDecoderConfiguration.setPosTagger( posTagger );
+      final Set<String> enabledFeatureIdSet = BatchRunner.loadEnabledFeaturesFromFile( enabledFeaturesFile );
+      assertionDecoderConfiguration.setEnabledFeatureIdSet( enabledFeatureIdSet );
+      final JarafeMEDecoder assertionDecoder = new JarafeMEDecoder( assertionModelFile );
+      assertionDecoderConfiguration.setAssertionDecoder( assertionDecoder );
+      _assertionDecoderConfiguration = assertionDecoderConfiguration;
+   }
+
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void process( final JCas jcas ) throws AnalysisEngineProcessException {
+      LOGGER.info( "Starting processing" );
+      final String documentText = jcas.getDocumentText();
+      final List<ApiConcept> apiConceptList = new ArrayList<>();
+      final Collection<Concept> concepts = JCasUtil.select( jcas, Concept.class );
+      for ( Concept concept : concepts ) {
+         final int begin = concept.getBegin();
+         final int end = concept.getEnd();
+         final String conceptText = documentText.substring( begin, end );
+         final ApiConcept apiConcept = new ApiConcept();
+         apiConcept.setBegin( begin );
+         apiConcept.setEnd( end );
+         apiConcept.setText( conceptText );
+         apiConcept.setType( concept.getConceptType() );
+         apiConcept.setExternalId( concept.getAddress() );
+         apiConceptList.add( apiConcept );
+      }
+      final SingleDocumentProcessorCtakes processor = new SingleDocumentProcessorCtakes();
+      processor.setJcas( jcas );
+      processor.setAssertionDecoderConfiguration( _assertionDecoderConfiguration );
+      processor.setContents( documentText );
+      final CharacterOffsetToLineTokenConverter converter = new CharacterOffsetToLineTokenConverterCtakesImpl( jcas );
+      processor.setConverter2( converter );
+      apiConceptList.forEach( processor::addConcept );
+      LOGGER.debug( "BEFORE CALLING processor.processSingleDocument()" );
+      processor.processSingleDocument();
+      LOGGER.debug( "AFTER CALLING processor.processSingleDocument()" );
+      final Map<Integer, String> assertionTypeMap = processor.getAssertionTypeMap();
+      final CasIndexer<Annotation> indexer = new CasIndexer<>( jcas, null );
+      for ( Map.Entry<Integer, String> current : assertionTypeMap.entrySet() ) {
+         final Integer currentIndex = current.getKey();
+         final String currentAssertionType = current.getValue();
+         final ApiConcept originalConcept = apiConceptList.get( currentIndex );
+         final Concept associatedConcept = (Concept)indexer.lookupByAddress( originalConcept.getExternalId() );
+         final int entityAddress = associatedConcept.getOriginalEntityExternalId();
+         final IdentifiedAnnotation annotation = (IdentifiedAnnotation)indexer.lookupByAddress( entityAddress );
+         mapI2B2AssertionValueToCtakes( currentAssertionType, annotation );
+      }
+      LOGGER.info( "Processing Finished" );
+   }
+
+
+   static private void fillProperties( final IdentifiedAnnotation annotation,
+                                       final int polarity, final int uncertainty,
+                                       final boolean generic, final boolean conditional,
+                                       final String subject, final float confidence ) {
+      annotation.setPolarity( polarity );
+      annotation.setUncertainty( uncertainty );
+      annotation.setGeneric( generic );
+      annotation.setConditional( conditional );
+      annotation.setSubject( subject );
+      annotation.setConfidence( confidence );
+   }
+
+
+   // possible values for currentAssertionType:
+   // present
+   // absent
+   // associated_with_someone_else
+   // conditional
+   // hypothetical
+   // possible
+   // Changed from original implementation by information in https://www.mitre.org/sites/default/files/pdf/10_4676.pdf
+   static private void mapI2B2AssertionValueToCtakes( final String assertionType,
+                                                      final IdentifiedAnnotation annotation )
+         throws AnalysisEngineProcessException {
+      if ( assertionType == null ) {
+         LOGGER.error( "current assertion type is null" );
+         fillProperties( annotation, -2, -2, false, false, "skipped", -2.0f );
+         return;
+      }
+      switch ( assertionType ) {
+         case "present":
+            fillProperties( annotation, 1, 0, false, false, ATTR_SUBJECT_PATIENT, 1.0f );
+            break;
+         case "absent":
+            fillProperties( annotation, -1, 0, false, false, ATTR_SUBJECT_PATIENT, 1.0f );
+            break;
+         case "associated_with_someone_else":
+            // OLD:   annotation.setSubject( "CONST.ATTR_SUBJECT_FAMILY_MEMBER" );
+            fillProperties( annotation, 1, 0, false, false, ATTR_SUBJECT_FAMILY_MEMBER, 1.0f );
+            break;
+         case "conditional":
+            // OLD:   currently no mapping to sharp type...all sharp properties are defaults!
+            // OLD:   annotation.setConditional( false );
+            fillProperties( annotation, 1, 0, false, true, ATTR_SUBJECT_PATIENT, 1.0f );
+            break;
+         case "hypothetical":
+            // OLD:   annotation.setConditional( true ); annotation.setGeneric( false );
+            fillProperties( annotation, 1, 0, true, false, ATTR_SUBJECT_PATIENT, 1.0f );
+            break;
+         case "possible":
+            fillProperties( annotation, 1, 1, false, false, ATTR_SUBJECT_PATIENT, 1.0f );
+            break;
+         default:
+            LOGGER.error( "unexpected assertion value returned: " + assertionType );
+            fillProperties( annotation, -2, -2, false, false, "skipped", -2.0f );
+            break;
+      }
+   }
+
+}