You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2015/02/19 19:06:17 UTC
svn commit: r1660963 [4/19] - in /ctakes/sandbox/timelanes: META-INF/ edu/
edu/mayo/ edu/mayo/bmi/ edu/mayo/bmi/annotation/
edu/mayo/bmi/annotation/knowtator/ org/ org/chboston/ org/chboston/cnlp/
org/chboston/cnlp/anafora/ org/chboston/cnlp/anafora/an...
Added: ctakes/sandbox/timelanes/org/chboston/cnlp/iaa/evaluator/temporal/TimeRelationConstants.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/iaa/evaluator/temporal/TimeRelationConstants.java?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/iaa/evaluator/temporal/TimeRelationConstants.java (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/iaa/evaluator/temporal/TimeRelationConstants.java Thu Feb 19 18:06:13 2015
@@ -0,0 +1,17 @@
+package org.chboston.cnlp.iaa.evaluator.temporal;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 4/17/13
+ */
+public interface TimeRelationConstants {
+ int NO = -1;
+ int BF = 0;
+ int AF = 1;
+ int CN = 2;
+ int CB = 3;
+ int BO = 4;
+ int EO = 5;
+ int OV = 6;
+}
Added: ctakes/sandbox/timelanes/org/chboston/cnlp/iaa/evaluator/temporal/TlinkType.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/iaa/evaluator/temporal/TlinkType.java?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/iaa/evaluator/temporal/TlinkType.java (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/iaa/evaluator/temporal/TlinkType.java Thu Feb 19 18:06:13 2015
@@ -0,0 +1,163 @@
+package org.chboston.cnlp.iaa.evaluator.temporal;
+
+import org.chboston.cnlp.nlp.annotation.annotation.Annotation;
+import org.chboston.cnlp.nlp.annotation.attribute.Attribute;
+import org.chboston.cnlp.nlp.annotation.attribute.DefaultAttribute;
+import org.chboston.cnlp.nlp.annotation.attribute.DefinedAttributeType;
+import org.chboston.cnlp.nlp.annotation.relation.Relation;
+
+import static org.chboston.cnlp.iaa.evaluator.temporal.TimeRelationConstants.*;
+
+/**
+ * Enumeration of the five temporal relation types used in THYME (before, overlap, contains, begins-on, ends-on)
+ * and their reciprocals (after, overlap, contained_by, ends-on, begins-on)
+ */
+public enum TlinkType {
+ BEFORE( BF ), // A+ < B- where + is end, - is start
+ AFTER( AF ), // A- > B+
+ OVERLAP( OV ), // A- < B- < A+ || B- < A- < B+
+ CONTAINS( CN ), // A- < B- && A+ > B+
+ CONTAINED_BY( CB ), // A- > B- && A+ < B+
+ BEGINS_ON( BO ), // A- = B+
+ ENDS_ON( EO ); // A+ = B-
+// INITIATES( IN )
+// CONTINUES( CT )
+// REINITIATES( RE )
+
+ // TODO refactor to use TlinkAttributeValue enum
+
+ static public TlinkType getTlinkType( final Annotation tlink ) {
+ final Attribute attribute = tlink.getAttribute( DefinedAttributeType.RELATION_TYPE );
+ if ( attribute == null ) {
+ return null;
+ }
+ return getTimeRelationType( attribute.getValue() );
+ }
+
+ static public TlinkType getTimeRelationType( final String name ) {
+ final String upper = name.toUpperCase().replace( "_", "-" );
+ if ( upper.equals( "BEFORE" ) ) {
+ return BEFORE;
+ } else if ( upper.equals( "AFTER" ) ) {
+ return AFTER; // Special case for symmetry
+ } else if ( upper.equals( "OVERLAP" ) || upper.equals( "UNDEFINED" ) || upper.isEmpty() ) {
+ return OVERLAP;
+ } else if ( upper.equals( "CONTAINS" ) ) {
+ return CONTAINS;
+ } else if ( upper.equals( "CONTAINED-BY" ) ) {
+ return CONTAINED_BY; // Special case for symmetry
+ } else if ( upper.equals( "BEGINS-ON" ) || upper.equals( "CONTINUES" ) || upper.equals( "TERMINATES" ) ) {
+ return BEGINS_ON;
+ } else if ( upper.equals( "ENDS-ON" ) || upper.equals( "INITIATES" ) || upper.equals( "REINITIATES" ) ) {
+ return ENDS_ON;
+ }
+ return null;
+ }
+
+
+
+ static private TlinkType getTimeRelationType( final int index ) {
+ switch ( index ) {
+ case BF:
+ return BEFORE;
+ case AF:
+ return AFTER;
+ case CN:
+ return CONTAINS;
+ case CB:
+ return CONTAINED_BY;
+ case BO:
+ return BEGINS_ON;
+ case EO:
+ return ENDS_ON;
+ case OV:
+ return OVERLAP;
+ }
+ return null;
+ }
+
+ // Given relation A to B and relation B to C, return relation A to C
+ // This is a very conservative interpretation.
+ // There are several OV possibilities that are left out because CN is an equal possibility
+ static private final int[][] TML_ABC_ARRAY =
+ //BF, AF, CN, CB, BO, EO, OV A to B
+ { { BF, NO, NO, BF, NO, BF, NO }, // BF
+ { NO, AF, NO, AF, AF, NO, NO }, // AF
+ { BF, AF, CN, NO, AF, BF, NO }, // CN
+ { NO, NO, OV, CB, OV, OV, OV }, // CB B to C
+ { NO, AF, NO, AF, AF, OV, NO }, // BO
+ { BF, AF, NO, BF, OV, BF, NO }, // EO
+ { NO, NO, OV, NO, NO, NO, NO } }; // OV then A to C
+
+ final private int _index;
+
+ private TlinkType( final int index ) {
+ _index = index;
+ }
+
+ private int getIndex() {
+ return _index;
+ }
+
+ /**
+ * @param tlinkTypeBtoC a relation with a start argument coincidental with this relation
+ * @return for this relation A to B and the given relation B to C, return relation A to C
+ */
+ public TlinkType getTimeRelationTypeAtoC( final TlinkType tlinkTypeBtoC ) {
+ // The array elements are fetched [row][column]
+ // Checked and works 7/10/13 spf
+ final int relationIndex = TML_ABC_ARRAY[ tlinkTypeBtoC.getIndex() ][ getIndex() ];
+// System.out.println( "A " + toString() + " B and B " + relationTypeBtoC + " C so A " + getTlinkType( relationIndex ) + " C");
+ return getTimeRelationType( relationIndex );
+ }
+
+ /**
+ * @return the reciprocal Temporal Relation type of (this) relation type
+ */
+ public TlinkType getReciprocal() {
+ switch ( this ) {
+ case BEFORE:
+ return AFTER;
+ case AFTER:
+ return BEFORE;
+ case OVERLAP:
+ return OVERLAP;
+ case CONTAINS:
+ return CONTAINED_BY;
+ case CONTAINED_BY:
+ return CONTAINS;
+ case BEGINS_ON:
+ return ENDS_ON;
+ case ENDS_ON:
+ return BEGINS_ON;
+ }
+ return null;
+ }
+
+ public Attribute getAsAttribute() {
+ final String attributeValue = getAttributeValue( this );
+ return new DefaultAttribute( DefinedAttributeType.RELATION_TYPE, attributeValue );
+ }
+
+ static private String getAttributeValue( final TlinkType tlinkType ) {
+ switch ( tlinkType ) {
+ case BEFORE:
+ return "BEFORE";
+ case AFTER:
+ return "AFTER";
+ case OVERLAP:
+ return "OVERLAP";
+ case CONTAINS:
+ return "CONTAINS";
+ case CONTAINED_BY:
+ return "CONTAINED-BY";
+ case BEGINS_ON:
+ return "BEGINS-ON";
+ case ENDS_ON:
+ return "ENDS-ON";
+ }
+ return null;
+ }
+
+
+}
Added: ctakes/sandbox/timelanes/org/chboston/cnlp/iaa/evaluator/temporal/TlinkTypeStore.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/iaa/evaluator/temporal/TlinkTypeStore.java?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/iaa/evaluator/temporal/TlinkTypeStore.java (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/iaa/evaluator/temporal/TlinkTypeStore.java Thu Feb 19 18:06:13 2015
@@ -0,0 +1,27 @@
+package org.chboston.cnlp.iaa.evaluator.temporal;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/1/2014
+ */
+public interface TlinkTypeStore extends Iterable<TlinkType> {
+ boolean add( TlinkType tlinkType );
+
+ int size();
+
+ boolean isEmpty();
+
+ boolean contains( TlinkType tlinkType );
+
+ TlinkTypeStore createReciprocalStore();
+
+ /**
+ * using these relations A to B and given relations B to C, return relations A to C
+ *
+ * @param tlinkTypesBtoC for a relation with a start argument coincidental with (relationType12's relation), the store
+ * @return for a relation with the start argument of (relationTypeAtoB's relations) and the end argument of
+ * (relationTypeBtoC's relations), the set of time relations if one can be derived from the first two other sets, else null
+ */
+ TlinkTypeStore createAtoCstore( final TlinkTypeStore tlinkTypesBtoC );
+}
Added: ctakes/sandbox/timelanes/org/chboston/cnlp/knowtator/annotation/parser/AnnotationTypeSegregator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/knowtator/annotation/parser/AnnotationTypeSegregator.java?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/knowtator/annotation/parser/AnnotationTypeSegregator.java (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/knowtator/annotation/parser/AnnotationTypeSegregator.java Thu Feb 19 18:06:13 2015
@@ -0,0 +1,125 @@
+package org.chboston.cnlp.knowtator.annotation.parser;
+
+import org.chboston.cnlp.nlp.annotation.classtype.ClassType;
+import org.chboston.cnlp.nlp.annotation.classtype.TemporalClassType;
+import org.chboston.cnlp.nlp.annotation.entity.Entity;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 6/25/12
+ */
+final public class AnnotationTypeSegregator {
+
+ static final String[] RELATION_TAGS_1 = { "Argument", "Argument_CU" };
+ static final String[] RELATION_TAGS_2 = { "Related_to", "Related_to_CU" };
+
+ static final String[] TIME_RELATION_TAGS_1 = { "Event" };
+ static final String[] TIME_RELATION_TAGS_2 = { "related_to" };
+
+ static final String RELATION_ROLE_KEY_1 = RELATION_TAGS_1[ 0 ];
+ static final String RELATION_ROLE_KEY_2 = RELATION_TAGS_2[ 0 ];
+
+ static final String TIME_RELATION_ROLE_KEY_1 = TIME_RELATION_TAGS_1[ 0 ];
+ static final String TIME_RELATION_ROLE_KEY_2 = TIME_RELATION_TAGS_2[ 0 ];
+
+ private AnnotationTypeSegregator() {
+ }
+
+
+ /**
+ * @param entities collection of entities
+ * @return all named entities with the given collection of entities
+ */
+ static List<Entity> getNamedEntities( final Collection<Entity> entities ) {
+ final List<Entity> namedEntityList = new ArrayList<>();
+ for ( Entity entity : entities ) {
+ final ClassType type = entity.getClassType();
+ if ( type != TemporalClassType.EVENT && type != TemporalClassType.TIMEX ) {
+ namedEntityList.add( entity );
+ }
+ }
+ return Collections.unmodifiableList( namedEntityList );
+ }
+
+ /**
+ * @param entities collection of entities
+ * @return all events with the given collection of entities
+ */
+ static List<Entity> getEvents( final Collection<Entity> entities ) {
+ final List<Entity> eventList = new ArrayList<>();
+ for ( Entity entity : entities ) {
+ final ClassType type = entity.getClassType();
+ if ( type == TemporalClassType.EVENT ) {
+ eventList.add( entity );
+ }
+ }
+ return Collections.unmodifiableList( eventList );
+ }
+
+ /**
+ * @param entities collection of entities
+ * @return all timex3 times with the given collection of entities
+ */
+ static List<Entity> getTimes( final Collection<Entity> entities ) {
+ final List<Entity> timexList = new ArrayList<>();
+ for ( Entity entity : entities ) {
+ final ClassType type = entity.getClassType();
+ if ( type == TemporalClassType.TIMEX ) {
+ timexList.add( entity );
+ }
+ }
+ return Collections.unmodifiableList( timexList );
+ }
+
+
+ /**
+ * Convert Argument_CU and Related_to_CU to Argument and Related_to.
+ * This will not be necessary in the future when the data will be
+ * post-processed to remove _CU suffixes.
+ * <p/>
+ * Currently mipacq data does not have the suffixes and sharp data does.
+ */
+ static String normalizeUmlsRelationRoleName( final String role ) {
+ for ( String tag1 : RELATION_TAGS_1 ) {
+ if ( tag1.equals( role ) ) {
+ return RELATION_ROLE_KEY_1;
+ }
+ }
+ for ( String tag2 : RELATION_TAGS_2 ) {
+ if ( tag2.equals( role ) ) {
+ return RELATION_ROLE_KEY_2;
+ }
+ }
+ return role;
+ }
+
+
+ /**
+ * Convert Argument_CU and Related_to_CU to Argument and Related_to.
+ * This will not be necessary in the future when the data will be
+ * post-processed to remove _CU suffixes.
+ * <p/>
+ * Currently mipacq data does not have the suffixes and sharp data does.
+ */
+ static String normalizeTimeRelationRoleName( final String role ) {
+ for ( String tag1 : RELATION_TAGS_1 ) {
+ if ( tag1.equals( role ) ) {
+ return RELATION_ROLE_KEY_1;
+ }
+ }
+ for ( String tag2 : RELATION_TAGS_2 ) {
+ if ( tag2.equals( role ) ) {
+ return RELATION_ROLE_KEY_2;
+ }
+ }
+ return role;
+ }
+
+
+}
Added: ctakes/sandbox/timelanes/org/chboston/cnlp/knowtator/annotation/parser/KnowtatorTextSourceParser.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/knowtator/annotation/parser/KnowtatorTextSourceParser.java?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/knowtator/annotation/parser/KnowtatorTextSourceParser.java (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/knowtator/annotation/parser/KnowtatorTextSourceParser.java Thu Feb 19 18:06:13 2015
@@ -0,0 +1,389 @@
+package org.chboston.cnlp.knowtator.annotation.parser;
+
+import edu.mayo.bmi.annotation.knowtator.KnowtatorProject;
+import edu.stanford.smi.protege.model.KnowledgeBase;
+import edu.stanford.smi.protege.model.SimpleInstance;
+import edu.stanford.smi.protege.model.Slot;
+import edu.stanford.smi.protege.model.framestore.FrameStore;
+import edu.stanford.smi.protege.model.framestore.NarrowFrameStore;
+import edu.uchsc.ccp.knowtator.AnnotationUtil;
+import edu.uchsc.ccp.knowtator.KnowtatorProjectUtil;
+import edu.uchsc.ccp.knowtator.MentionUtil;
+import edu.uchsc.ccp.knowtator.Span;
+import edu.uchsc.ccp.knowtator.textsource.TextSource;
+import edu.uchsc.ccp.knowtator.textsource.TextSourceAccessException;
+import edu.uchsc.ccp.knowtator.textsource.TextSourceCollection;
+import edu.uchsc.ccp.knowtator.textsource.TextSourceIterator;
+import org.chboston.cnlp.nlp.annotation.annotation.store.AnnotationStore;
+import org.chboston.cnlp.nlp.annotation.annotation.store.ImmutableAnnotationStore;
+import org.chboston.cnlp.nlp.annotation.attribute.Attribute;
+import org.chboston.cnlp.nlp.annotation.attribute.DefaultAttribute;
+import org.chboston.cnlp.nlp.annotation.attribute.DefinedAttributeType;
+import org.chboston.cnlp.nlp.annotation.classtype.ClassType;
+import org.chboston.cnlp.nlp.annotation.classtype.ClassTypeFactory;
+import org.chboston.cnlp.nlp.annotation.entity.DefaultEntity;
+import org.chboston.cnlp.nlp.annotation.entity.Entity;
+import org.chboston.cnlp.nlp.annotation.parser.AbstractAnnotationsParser;
+import org.chboston.cnlp.nlp.annotation.relation.Relation;
+import org.chboston.cnlp.nlp.annotation.textspan.DefaultDiscontiguousTextSpan;
+import org.chboston.cnlp.nlp.annotation.textspan.DefaultTextSpan;
+import org.chboston.cnlp.nlp.annotation.textspan.TextSpan;
+
+import java.io.File;
+import java.util.*;
+import java.util.logging.Logger;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 6/26/12
+ */
+final public class KnowtatorTextSourceParser extends AbstractAnnotationsParser {
+
+ static private final Logger LOGGER = Logger.getLogger( "KnowtatorTextSourceParser" );
+
+ private KnowtatorProject _project;
+ private AnnotationStore _annotationStore;
+
+
+ public KnowtatorTextSourceParser( final KnowtatorProject project ) {
+ _project = project;
+ }
+
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public AnnotationStore getAnnotationStore() {
+ return _annotationStore;
+ }
+
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public boolean preParseFile( final String filePath ) {
+ return parseFile( filePath );
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public boolean parseFile( final String filePath ) {
+ reset();
+ final String filePathName = new File( filePath ).getName();
+ final TextSourceCollection docCollection = _project.getTsc();
+ final TextSourceIterator docCollectionIterator = docCollection.iterator();
+ while ( docCollectionIterator.hasNext() ) {
+ TextSource doc;
+ try {
+ doc = docCollectionIterator.next();
+ } catch ( TextSourceAccessException tsaE ) {
+ LOGGER.warning( tsaE.getMessage() );
+ continue;
+ }
+ final String fileName = doc.getName();
+ if ( fileName.equals( filePath ) || fileName.equals( filePathName ) ) {
+ return parseTextSource( doc );
+ }
+ }
+ return false;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public List<Collection<TextSpan>> parseCoreferenceTextSpans( final String filePath ) {
+ return Collections.emptyList();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void close() {
+ final KnowledgeBase knowledgeBase = _project.getProtegeProject().getKnowledgeBase();
+ final List stores = knowledgeBase.getFrameStores();
+ for ( Object object : stores ) {
+ if ( object instanceof FrameStore ) {
+ ((FrameStore)object).reinitialize();
+ ((FrameStore)object).close();
+ } else if ( object instanceof NarrowFrameStore ) {
+ ((NarrowFrameStore)object).close();
+ } else {
+ LOGGER.warning( "Couldn't close " + object.getClass().getName() );
+ }
+ }
+ reset();
+ }
+
+
+ /**
+ * reset the internally stored information
+ */
+ protected void reset() {
+ super.reset();
+ _annotationStore = null;
+ }
+
+ public boolean parseTextSource( final TextSource doc ) {
+ reset();
+ final KnowledgeBase knowledgeBase = _project.getProtegeProject().getKnowledgeBase();
+ final AnnotationUtil annotationUtil = _project.getAnnotationUtil();
+ final KnowtatorProjectUtil projectUtil = new KnowtatorProjectUtil( knowledgeBase );
+ final MentionUtil mentionUtil = new MentionUtil( projectUtil );
+ _project.getProtegeProject().setIsReadonly( true );
+ final Collection<SimpleInstance> annotations = annotationUtil.getAnnotations( doc );
+ final Map<String, Entity> entityMap = getEntities( annotationUtil, mentionUtil, _project, annotations );
+ // Add Entity to appropriate collection
+ final List<Entity> entityList = AnnotationTypeSegregator.getNamedEntities( entityMap.values() );
+ final List<Entity> eventList = AnnotationTypeSegregator.getEvents( entityMap.values() );
+ final List<Entity> timexList = AnnotationTypeSegregator.getTimes( entityMap.values() );
+
+ // Add Relation to appropriate collection
+ final List<Relation> umlsRelationList = getRelations( annotationUtil, mentionUtil, _project,
+ AnnotationTypeSegregator.RELATION_TAGS_1,
+ AnnotationTypeSegregator.RELATION_TAGS_2,
+ entityMap, annotations );
+ final List<Relation> timeRelationList = getRelations( annotationUtil, mentionUtil, _project,
+ AnnotationTypeSegregator.TIME_RELATION_TAGS_1,
+ AnnotationTypeSegregator.TIME_RELATION_TAGS_2,
+ entityMap, annotations );
+ int wordCount = -1;
+ try {
+ final String text = doc.getText();
+ wordCount = text.split( "\\s+" ).length;
+ } catch ( TextSourceAccessException tsaE ) {
+ LOGGER.warning( tsaE.getMessage() );
+ }
+ final ImmutableAnnotationStore.AnnoteCollectBuilder builder
+ = new ImmutableAnnotationStore.AnnoteCollectBuilder();
+ builder.entities( entityList ).events( eventList ).times( timexList );
+ builder.relations( umlsRelationList ).timeRelations( timeRelationList ).wordCount( wordCount );
+ _annotationStore = builder.build();
+ return true;
+ }
+
+ private Map<String, Entity> getEntities( final AnnotationUtil annotationUtil,
+ final MentionUtil mentionUtil,
+ final KnowtatorProject project,
+ final Iterable<SimpleInstance> annotations ) {
+ final Map<String, Entity> entityMap = new HashMap<>();
+ if ( annotations == null ) {
+ return entityMap;
+ }
+ for ( SimpleInstance simpleAnnotation : annotations ) {
+ final String spannedText = annotationUtil.getText( simpleAnnotation );
+ if ( spannedText == null ) {
+ // RELATION ?
+ continue;
+ }
+
+ final TextSpan textSpan = getTextSpan( annotationUtil, simpleAnnotation );
+ final ClassType classType = getClassType( project, simpleAnnotation, getMutableClassTypes() );
+ final String annotator = getAnnotator( annotationUtil, simpleAnnotation );
+ final String kid = getKnowtatorID( simpleAnnotation );
+ final List<Attribute> attributeList = getAttributeList( annotationUtil, mentionUtil,
+ simpleAnnotation, getMutableAttributeTypes() );
+ attributeList.add( new DefaultAttribute( DefinedAttributeType.UNIQUE_ID, kid ) );
+ attributeList.add( new DefaultAttribute( DefinedAttributeType.CREATOR, annotator ) );
+ ///////////////////////// CREATE ENTITY /////////////////////////
+ final Entity entity = new DefaultEntity( textSpan, spannedText, classType,
+ attributeList.toArray( new Attribute[ attributeList.size() ] ) );
+ entityMap.put( kid, entity );
+ }
+ return entityMap;
+ }
+
+
+ private List<Relation> getRelations( final AnnotationUtil annotationUtil,
+ final MentionUtil mentionUtil,
+ final KnowtatorProject project,
+ final String[] relationTags1, final String[] relationTags2,
+ final Map<String, Entity> entityMap,
+ final Collection<SimpleInstance> annotations ) {
+ // TODO
+ // TODO
+ // TODO
+ // TODO
+ // TODO
+ // TODO
+ // TODO
+ // TODO
+ // TODO
+ // TODO
+ // TODO
+ return Collections.emptyList();
+// final List<Relation> relationList = new ArrayList<Relation>();
+// if ( annotations == null ) {
+// return relationList;
+// }
+// for ( SimpleInstance simpleAnnotation : annotations ) {
+// final String spannedText = annotationUtil.getText( simpleAnnotation );
+// if ( spannedText != null ) {
+// // ENTITY ?
+// continue;
+// }
+// final ClassType classType = getClassType( project, simpleAnnotation, _relationTypes );
+// final String annotator = getAnnotator( annotationUtil, simpleAnnotation );
+// final String kid = getKnowtatorID( simpleAnnotation );
+// final List<Attribute> attributeList = getAttributeList( annotationUtil, mentionUtil,
+// simpleAnnotation, _attributeNames );
+//
+// ///////////////////////// FIND RELATION ENTITIES /////////////////////////
+// Entity[] relationEntities = getRelationEntities( annotationUtil, mentionUtil, simpleAnnotation,
+// entityMap, relationTags1, relationTags2 );
+//
+// ///////////////////////// CREATE RELATION /////////////////////////
+// if ( relationEntities != null && relationEntities.length == 2
+// && relationEntities[0] != null && relationEntities[1] != null ) {
+// final Relation relation = new IdentifiedRelation( kid, relationEntities[0], relationEntities[1],
+// classType, annotator,
+// attributeList.toArray( new Attribute[attributeList.size()] ) );
+//
+// for ( Relation kidRelation : relationList ) {
+// if ( kidRelation.getTextSpan().equals( relation.getTextSpan() ) ) {
+// System.out.println( relation );
+// System.out.println( " Is the same as " );
+// System.out.println( kidRelation );
+// }
+// }
+//
+// relationList.add( relation );
+// }
+// }
+// return relationList;
+ }
+
+
+ static private String getAnnotator( final AnnotationUtil annotationUtil, final SimpleInstance simpleAnnotation ) {
+ final SimpleInstance siAnnotator = annotationUtil.getAnnotator( simpleAnnotation );
+ if ( siAnnotator != null ) {
+ return siAnnotator.getBrowserText();
+ }
+ return "Unknown";
+ }
+
+ static private String getKnowtatorID( final SimpleInstance simpleAnnotation ) {
+ String kid = simpleAnnotation.getName();
+ if ( kid != null && !kid.isEmpty() ) {
+ final int uIndex = kid.lastIndexOf( '_' );
+ if ( uIndex >= 0 && uIndex < kid.length() - 1 ) {
+ return kid.substring( uIndex + 1 );
+ }
+ }
+ return kid;
+ }
+
+ static private ClassType getClassType( final KnowtatorProject project,
+ final SimpleInstance simpleAnnotation, final Collection<String> typeNames ) {
+ String classTypeName = null;
+ try {
+ // prep for a bug in Knowtator - npe can be thrown in a few places, so rather than account for all, catch
+ classTypeName = project.getKnowtatorClsName( simpleAnnotation );
+ } catch ( NullPointerException npE ) {
+ LOGGER.severe( simpleAnnotation.getBrowserText() + " has no class type" );
+ }
+ final ClassType classType = ClassTypeFactory.getClassType( classTypeName );
+ typeNames.add( classType.getName() );
+ return classType;
+ }
+
+ static private TextSpan getTextSpan( final AnnotationUtil annotationUtil,
+ final SimpleInstance simpleAnnotation ) {
+ final java.util.List<Span> simpleSpans = annotationUtil.getSpans( simpleAnnotation );
+ TextSpan textSpan = null;
+ Set<TextSpan> cnlpSpanSet = new HashSet<>();
+ for ( Span simpleSpan : simpleSpans ) {
+ textSpan = new DefaultTextSpan( simpleSpan.getStart(), simpleSpan.getEnd() );
+ cnlpSpanSet.add( textSpan );
+ }
+ if ( cnlpSpanSet.size() > 1 ) {
+ return new DefaultDiscontiguousTextSpan( cnlpSpanSet.toArray( new TextSpan[ cnlpSpanSet.size() ] ) );
+ }
+ return textSpan;
+ }
+
+ static private List<Attribute> getAttributeList( final AnnotationUtil annotationUtil, final MentionUtil mentionUtil,
+ final SimpleInstance simpleAnnotation,
+ final Collection<String> attributeNames ) {
+ final List<Attribute> attributeList = new ArrayList<>();
+ final SimpleInstance mention = annotationUtil.getMention( simpleAnnotation );
+ final List<SimpleInstance> slotMentionList = mentionUtil.getSlotMentions( mention );
+ for ( SimpleInstance slotMention : slotMentionList ) {
+ if ( mentionUtil.isComplexSlotMention( slotMention ) ) {
+ continue;
+ }
+ final Slot slot = mentionUtil.getSlotMentionSlot( slotMention );
+ final String slotName = slot.getName();
+ final List slotValues = mentionUtil.getSlotMentionValues( slotMention );
+ for ( Object slotValue : slotValues ) {
+ final Attribute attribute = new DefaultAttribute( slotName, slotValue.toString() );
+ attributeList.add( attribute );
+ attributeNames.add( attribute.getName() );
+ }
+ }
+ return attributeList;
+ }
+
+ static private Entity[] getRelationEntities( final AnnotationUtil annotationUtil, final MentionUtil mentionUtil,
+ final SimpleInstance simpleAnnotation,
+ final Map<String, Entity> entityMap,
+ final String[] relationTags1,
+ final String[] relationTags2 ) {
+ final Entity[] entities = new Entity[ 2 ];
+ final SimpleInstance mention = annotationUtil.getMention( simpleAnnotation );
+ final List<SimpleInstance> slotMentionList = mentionUtil.getSlotMentions( mention );
+ for ( SimpleInstance slotMention : slotMentionList ) {
+ if ( mentionUtil.isComplexSlotMention( slotMention ) ) {
+ for ( String umlsRelationKey : relationTags1 ) {
+ final SimpleInstance arg = getReferencedAnnotation( mentionUtil, slotMention, umlsRelationKey );
+ if ( arg != null ) {
+ entities[ 0 ] = entityMap.get( getKnowtatorID( arg ) );
+ break;
+ }
+ }
+ for ( String umlsRelationKey : relationTags2 ) {
+ final SimpleInstance arg = getReferencedAnnotation( mentionUtil, slotMention, umlsRelationKey );
+ if ( arg != null ) {
+ entities[ 1 ] = entityMap.get( getKnowtatorID( arg ) );
+ break;
+ }
+ }
+ }
+ }
+ return entities;
+ }
+
+ static private SimpleInstance getReferencedAnnotation( final MentionUtil mentionUtil,
+ final SimpleInstance slotMention,
+ final String attributeName ) {
+ final Slot slot = mentionUtil.getSlotMentionSlot( slotMention );
+ final String slotName = slot.getName();
+ if ( !attributeName.equals( slotName ) ) {
+ return null;
+ }
+ final List slotValues = mentionUtil.getSlotMentionValues( slotMention );
+ for ( Object slotValue : slotValues ) {
+ if ( slotValue instanceof SimpleInstance ) {
+ return mentionUtil.getMentionAnnotation( (SimpleInstance)slotValue );
+ }
+ }
+ return null;
+ }
+
+
+ // public static void main( String[] args ) {
+ // final String pprjFilePath = "C:\\Spiffy\\Data\\knowtatorAnnotations\\Working\\SHARP\\merged_pilot2_take2\\merged_pilot2_s2\\merged_pilot2_s2.pprj";
+ // final String docDirPath = "C:\\Spiffy\\Data\\knowtatorAnnotations\\Working\\SHARP\\merged_pilot2_take2\\merged_pilot2_s2\\docs";
+ // // final String pprjFilePath = "C:\\Spiffy\\Data\\IAA\\TemporalAnnotations\\Unadjudicated Protege\\Set01\\amanda_docset1\\TemporalRelations_ProperCorefSchema_MergedSHARP.pprj";
+ // // final String docDirPath = "C:\\Spiffy\\Data\\IAA\\TemporalAnnotations\\Unadjudicated Protege\\Set01\\amanda_docset1\\doc1";
+ // final KnowtatorPprjParser reader = new KnowtatorPprjParser();
+ // reader.parse( pprjFilePath, docDirPath );
+ // }
+
+}
Added: ctakes/sandbox/timelanes/org/chboston/cnlp/knowtator/annotation/parser/KnowtatorXmlParser.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/knowtator/annotation/parser/KnowtatorXmlParser.java?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/knowtator/annotation/parser/KnowtatorXmlParser.java (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/knowtator/annotation/parser/KnowtatorXmlParser.java Thu Feb 19 18:06:13 2015
@@ -0,0 +1,584 @@
+package org.chboston.cnlp.knowtator.annotation.parser;
+
+import org.chboston.cnlp.nlp.annotation.attribute.Attribute;
+import org.chboston.cnlp.nlp.annotation.attribute.DefaultAttribute;
+import org.chboston.cnlp.nlp.annotation.attribute.DefinedAttributeType;
+import org.chboston.cnlp.nlp.annotation.classtype.ClassTypeFactory;
+import org.chboston.cnlp.nlp.annotation.entity.DefaultEntity;
+import org.chboston.cnlp.nlp.annotation.entity.Entity;
+import org.chboston.cnlp.nlp.annotation.parser.AbstractAnnotationXmlParser;
+import org.chboston.cnlp.nlp.annotation.relation.DefaultRelation;
+import org.chboston.cnlp.nlp.annotation.relation.Relation;
+import org.chboston.cnlp.nlp.annotation.textspan.DefaultDiscontiguousTextSpan;
+import org.chboston.cnlp.nlp.annotation.textspan.DiscontiguousTextSpan;
+import org.chboston.cnlp.nlp.annotation.textspan.EntityTextSpan;
+import org.chboston.cnlp.nlp.annotation.textspan.TextSpan;
+import org.jdom.Document;
+import org.jdom.Element;
+import org.jdom.JDOMException;
+import org.jdom.Text;
+import org.jdom.input.SAXBuilder;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.*;
+import java.util.logging.Logger;
+
+import static org.chboston.cnlp.nlp.annotation.annotation.store.ImmutableAnnotationStore.AnnoteCollectBuilder;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 5/8/12
+ */
+final public class KnowtatorXmlParser extends AbstractAnnotationXmlParser {
+
+ static private final Logger LOGGER = Logger.getLogger( "KnowtatorXmlParser" );
+
+ static private final boolean CULL_RELATION_REPEATS = true;
+
+ /**
+ * @param filePath path to file with annotation information
+ * @return true if this AnnotationsParser can properly handle the given file
+ */
+ static public boolean canParse( final String filePath ) {
+ final SAXBuilder saxBuilder = new SAXBuilder();
+ try {
+ final Document document = saxBuilder.build( filePath );
+ final Element rootElement = document.getRootElement();
+ // knowtator has a primary element named "annotations"
+ return rootElement != null && rootElement.getName().equals( "annotations" );
+ } catch ( JDOMException jdomE ) {
+ LOGGER.severe( jdomE.getMessage() );
+ return false;
+ } catch ( IOException ioE ) {
+ LOGGER.severe( ioE.getMessage() );
+ return false;
+ }
+ }
+
+
+ // In the XML file:
+ // <annotation> is a class (mention) id, annotator id, span, text, and creation date
+ //
+ // <classMention> is basically a "type" of annotation, e.g. classtype type or TIMEX. It may have a slot (mention).
+ // <classMention> always has the id of the annotation to which it applies
+ //
+ // <stringSlotMention> is kind of like an attribute of an annotation, e.g. cui.
+ // <stringSlotMention> always has the id of the annotation to which it applies. There is also <booleanSlotMention>
+ //
+ // If an <annotation> does not have a span and text, then it refers to a Relation.
+ // There will be a <classMention> with the id of the <annotation>.
+ // That <classMention> will have a "type", and it will also have a <slotMention> with an id for each term:
+ // The <slotMention> with id="Argument" points to the first term for the relation,
+ // while the <slotMention> with id="Related_to" points to the second term for the relation in <complexSlotMentionValue>
+ //
+ //
+ // <annotation>
+ // <mention id="Schema_Instance_1" />
+ // <annotator id="Me">CU annotator ,</annotator>
+ // <creationDate>Sat Feb 05 12:50:30 MST 2011</creationDate>
+ // </annotation>
+ //
+ // <classMention id="Schema_Instance_1">
+ // <mentionClass id="manages/treats">manages/treats</mentionClass>
+ // <hasSlotMention id="Schema_Instance_11" />
+ // <hasSlotMention id="Schema_Instance_22" />
+ // </classMention>
+ //
+ // <complexSlotMention id="Schema_Instance_11">
+ // <mentionSlot id="Argument" />
+ // <complexSlotMentionValue value="Schema_Instance_2" />
+ // </complexSlotMention>
+ //
+ // <complexSlotMention id="Schema_Instance_22">
+ // <mentionSlot id="Related_to" />
+ // <complexSlotMentionValue value="Schema_Instance_3" />
+ // </complexSlotMention>
+ //
+ // <annotation>
+ // <mention id="Schema_Instance_2" />
+ // <annotator id="Me">CU annotator ,</annotator>
+ // <span start="127" end="144" />
+ // <spannedText>CHOP chemotherapy</spannedText>
+ // <creationDate>Sat Feb 05 12:48:14 MST 2011</creationDate>
+ // </annotation>
+ //
+ // <annotation>
+ // <mention id="Schema_Instance_3" />
+ // <annotator id="NotMe">cTAKES , Mayo Clinic</annotator>
+ // <span start="59" end="78" />
+ // <spannedText>large-cell lymphoma</spannedText>
+ // </annotation>
+
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public boolean preParseFile( final String xmlFilePath ) {
+ reset();
+ if ( xmlFilePath == null || xmlFilePath.length() == 0 ) {
+ return false;
+ }
+ final File xmlFile = new File( xmlFilePath );
+ if ( !xmlFile.canRead() ) {
+ return false;
+ }
+ final SAXBuilder builder = new SAXBuilder();
+ try {
+ final Document document = builder.build( xmlFile );
+ final Element rootElement = document.getRootElement();
+ final Map<String, AnnotationInfo> infoMap = getAnnotInfoMap( rootElement );
+ final Collection<String> classTypes = new HashSet<>();
+ for ( AnnotationInfo info : infoMap.values() ) {
+ classTypes.add( info.__description );
+ }
+ getMutableClassTypes().addAll( classTypes );
+ final Map<String, RelationLink> relationLinkMap = getRelationLinkMap( rootElement );
+ Collection<RelationLink> relationLinks = relationLinkMap.values();
+ final Collection<String> relationTypeSet = new HashSet<>();
+ for ( RelationLink relationLink : relationLinks ) {
+ relationTypeSet.add( relationLink.__description );
+ }
+ getMutableRelationTypes().addAll( relationTypeSet );
+ final Map<String, Attribute> attributeMap = getAnnotAttributeMap( rootElement );
+ final Collection<String> attributeKeys = new HashSet<>();
+ for ( Attribute attribute : attributeMap.values() ) {
+ attributeKeys.add( attribute.getName() );
+ }
+ getMutableAttributeTypes().addAll( attributeKeys );
+ return true;
+ } catch ( JDOMException jdomE ) {
+ LOGGER.severe( jdomE.getMessage() );
+ return false;
+ } catch ( IOException ioE ) {
+ LOGGER.severe( ioE.getMessage() );
+ return false;
+ }
+ }
+
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public boolean parseFile( final String xmlFilePath ) {
+ reset();
+ if ( xmlFilePath == null || xmlFilePath.length() == 0 ) {
+ return false;
+ }
+ final File xmlFile = new File( xmlFilePath );
+ if ( !xmlFile.canRead() ) {
+ return false;
+ }
+ final SAXBuilder saxBuilder = new SAXBuilder();
+ try {
+ final Document document = saxBuilder.build( xmlFile );
+ final Element rootElement = document.getRootElement();
+ final Map<String, AnnotationInfo> infoMap = getAnnotInfoMap( rootElement );
+ final Map<String, Attribute> attributeMap = getAnnotAttributeMap( rootElement );
+ final Map<String, Entity> entityMap = getEntityMap( rootElement, infoMap, attributeMap );
+ final Map<String, RelationLink> relationLinkMap = getRelationLinkMap( rootElement );
+ final List<Relation> relationList = getRelations( rootElement,
+ AnnotationTypeSegregator.RELATION_ROLE_KEY_1,
+ AnnotationTypeSegregator.RELATION_ROLE_KEY_2,
+ entityMap, infoMap, relationLinkMap, attributeMap );
+ final List<Relation> timeRelationList = getRelations( rootElement,
+ AnnotationTypeSegregator.TIME_RELATION_ROLE_KEY_1,
+ AnnotationTypeSegregator.TIME_RELATION_ROLE_KEY_2,
+ entityMap,
+ infoMap,
+ relationLinkMap,
+ attributeMap );
+ final List<Entity> entityList = AnnotationTypeSegregator.getNamedEntities( entityMap.values() );
+ final List<Entity> eventList = AnnotationTypeSegregator.getEvents( entityMap.values() );
+ final List<Entity> timexList = AnnotationTypeSegregator.getTimes( entityMap.values() );
+
+ final AnnoteCollectBuilder builder = new AnnoteCollectBuilder();
+ builder.entities( entityList ).events( eventList ).times( timexList );
+ builder.relations( relationList ).timeRelations( timeRelationList ).wordCount( -1 );
+ _annotationStore = builder.build();
+ return true;
+ } catch ( JDOMException jdomE ) {
+ LOGGER.severe( jdomE.getMessage() );
+ return false;
+ } catch ( IOException ioE ) {
+ LOGGER.severe( ioE.getMessage() );
+ return false;
+ }
+ }
+
+ /**
+ * @param rootElement root xml element
+ * @return map of elementIDs and AnnotationInfo
+ */
+ static private Map<String, AnnotationInfo> getAnnotInfoMap( final Element rootElement ) {
+ final List<Element> infoElements = rootElement.getChildren( "classMention" );
+ if ( infoElements == null || infoElements.isEmpty() ) {
+ return Collections.emptyMap();
+ }
+ final Map<String, AnnotationInfo> annotationInfoMap = new HashMap<>( infoElements.size() );
+ for ( Element element : infoElements ) {
+ final String id = element.getAttributeValue( "id" );
+ final String description = element.getChildText( "mentionClass" );
+ // Associated AttributeType Values stored elsewhere : slotMention
+ final List<Element> linkedElementList = element.getChildren( "hasSlotMention" );
+ final List<String> linkedIdsList = new ArrayList<>( linkedElementList.size() );
+ for ( Element linkedElement : linkedElementList ) {
+ final String linkedId = linkedElement.getAttributeValue( "id" );
+ linkedIdsList.add( linkedId );
+ }
+ final AnnotationInfo annotationInfo = new AnnotationInfo( description, linkedIdsList );
+ annotationInfoMap.put( id, annotationInfo );
+ }
+ return annotationInfoMap;
+ }
+
+ /**
+ * @param rootElement xml root element
+ * @return map of elementIDs and Relation Links
+ */
+ static private Map<String, RelationLink> getRelationLinkMap( final Element rootElement ) {
+ final List<Element> relationElementsList = rootElement.getChildren( "complexSlotMention" );
+ if ( relationElementsList.isEmpty() ) {
+ return Collections.emptyMap();
+ }
+ final Map<String, RelationLink> relationLinkMap = new HashMap<>( relationElementsList.size() );
+ for ( Element relationElement : relationElementsList ) {
+ final String id = relationElement.getAttributeValue( "id" );
+ String description = relationElement.getChild( "mentionSlot" ).getAttributeValue( "id" );
+ description = AnnotationTypeSegregator.normalizeUmlsRelationRoleName( description );
+ description = AnnotationTypeSegregator.normalizeTimeRelationRoleName( description );
+ final String linkedId = relationElement.getChild( "complexSlotMentionValue" ).getAttributeValue( "value" );
+ final RelationLink relationLink = new RelationLink( description, linkedId );
+ relationLinkMap.put( id, relationLink );
+ }
+ return relationLinkMap;
+ }
+
+// <annotation>
+// <mention id="SHARe_Jan18_2012_base_Instance_90730" /> <------ 730
+// <annotator id="SHARe_Aug09_2011_base_Instance_10001">David , Harvard</annotator>
+// <span start="49" end="59" />
+// <spannedText>2013-08-14</spannedText>
+// </annotation>
+
+
+// <classMention id="SHARe_Jan18_2012_base_Instance_90715">
+// <mentionClass id="Disease_Disorder">Disease_Disorder</mentionClass>
+// <hasSlotMention id="SHARe_Jan18_2012_base_Instance_90718" />
+// <hasSlotMention id="SHARe_Jan18_2012_base_Instance_90723" /> ------> 723
+// <hasSlotMention id="SHARe_Jan18_2012_base_Instance_90724" />
+// <hasSlotMention id="SHARe_Jan18_2012_base_Instance_90727" />
+// </classMention>
+
+
+// <complexSlotMention id="SHARe_Jan18_2012_base_Instance_90723"> <------ 723
+// <mentionSlot id="temporal_expression" />
+// <complexSlotMentionValue value="SHARe_Jan18_2012_base_Instance_90730" /> ------> 730
+// </complexSlotMention>
+
+// <classMention id="SHARe_Jan18_2012_base_Instance_90730"> <------ 730
+// <mentionClass id="TIMEX3">TIMEX3</mentionClass>
+// <hasSlotMention id="SHARe_Jan18_2012_base_Instance_90732" /> ------> 732
+// </classMention>
+
+// <stringSlotMention id="SHARe_Jan18_2012_base_Instance_90732"> <------ 732
+// <mentionSlot id="type" />
+// <stringSlotMentionValue value="DATE" />
+// </stringSlotMention>
+
+
+ /**
+ * Get CUI, reality, etc.
+ *
+ * @param rootElement xml root element
+ * @return map of elementIDs and Attributes
+ */
+ static private Map<String, Attribute> getAnnotAttributeMap( final Element rootElement ) {
+ final List<Element> attributeElementsList = rootElement.getChildren( "stringSlotMention" );
+ final List<Element> boolElementsList = rootElement.getChildren( "booleanSlotMention" );
+ if ( attributeElementsList.isEmpty() && boolElementsList.isEmpty() ) {
+ return Collections.emptyMap();
+ }
+ final Map<String, Attribute> attributeMap = new HashMap<>(
+ attributeElementsList.size() + boolElementsList.size() );
+ for ( Element attributeElement : attributeElementsList ) {
+ final String id = attributeElement.getAttributeValue( "id" );
+ final String attributeName = attributeElement.getChild( "mentionSlot" ).getAttributeValue( "id" );
+ final String value = attributeElement.getChild( "stringSlotMentionValue" ).getAttributeValue( "value" );
+ final Attribute attribute = new DefaultAttribute( attributeName, value );
+ attributeMap.put( id, attribute );
+ }
+ for ( Element attributeElement : boolElementsList ) {
+ final String id = attributeElement.getAttributeValue( "id" );
+ final String attributeName = attributeElement.getChild( "mentionSlot" ).getAttributeValue( "id" );
+ final String value = attributeElement.getChild( "booleanSlotMentionValue" ).getAttributeValue( "value" );
+ final Attribute attribute = new DefaultAttribute( attributeName, value );
+ attributeMap.put( id, attribute );
+ }
+ return attributeMap;
+ }
+
+
+ static private final String CONSENSUS_ANNOTATOR = "consensus set annotator team";
+
+ /**
+ * @param rootElement the root xml element in an annotation xml file
+ * @param infoMap map of entityIDs and associated AnnotationInfo
+ * @param attributeMap map of attributes per AnnotationInfo
+ * @return map of entityIDs and Knowtator Entities
+ */
+ private Map<String, Entity> getEntityMap( final Element rootElement,
+ final Map<String, AnnotationInfo> infoMap,
+ final Map<String, Attribute> attributeMap ) {
+ final List<Element> annotationElementList = rootElement.getChildren( "annotation" );
+ final Map<String, Entity> entityMap = new HashMap<>();
+ for ( Element annotationElement : annotationElementList ) {
+ final List<Element> spanElementList = annotationElement.getChildren( "span" );
+ final List<Element> textElementList = annotationElement.getChildren( "spannedText" );
+ if ( spanElementList == null || spanElementList.isEmpty() || textElementList.isEmpty() ) {
+ // spanless annotation, e.g. a relation;
+ continue;
+ }
+ final Element infoElement = annotationElement.getChild( "mention" );
+ if ( infoElement == null ) {
+ continue;
+ }
+ final String entityId = infoElement.getAttributeValue( "id" );
+ final AnnotationInfo annotationInfo = infoMap.get( entityId );
+ if ( annotationInfo == null ) {
+ continue;
+ }
+ // Get the annotator name
+ final String annotatorName = annotationElement.getChildText( "annotator" );
+
+ final TextSpan textSpan = createTextSpan( spanElementList );
+ if ( textSpan.equals( BAD_TEXT_SPAN ) ) {
+ continue;
+ }
+ final String spannedText = createSpannedText( textElementList, textSpan );
+ final List<Attribute> attributeList = getAttributeList( annotationInfo, attributeMap );
+ attributeList.add( new DefaultAttribute( DefinedAttributeType.UNIQUE_ID, entityId ) );
+ attributeList.add( new DefaultAttribute( DefinedAttributeType.CREATOR, annotatorName ) );
+ final Entity entity = new DefaultEntity( textSpan, spannedText,
+ ClassTypeFactory.getClassType( annotationInfo.getDescription() ),
+ attributeList.toArray( new Attribute[ attributeList.size() ] ) );
+ entityMap.put( entityId, entity );
+ }
+ return entityMap;
+ }
+
+
+ /**
+ * @param textElementList List of xml elements representing text
+ * @param span a single or multiple span
+ * @return a representation of the text demarcated by the given span
+ */
+ // This is extremely ugly, but useful
+ static private String createSpannedText( final List<Element> textElementList, final TextSpan span ) {
+ if ( !(span instanceof DefaultDiscontiguousTextSpan) ) {
+ return createSpannedText( textElementList.get( 0 ) );
+ }
+ final List<String> textList = new ArrayList<>();
+ for ( Element element : textElementList ) {
+ final String text = createSpannedText( element );
+ final String[] splits = text.split( "\\.\\.\\." );
+ for ( String split : splits ) {
+ textList.add( split.trim() );
+ }
+ }
+ int spanCount = ((DiscontiguousTextSpan)span).getTextSpanCount();
+ if ( spanCount == textList.size() ) {
+ final char[] characters = new char[ span.getEndIndex() - span.getStartIndex() ];
+ Arrays.fill( characters, ' ' );
+ int spanStart = span.getStartIndex();
+ for ( int i = 0; i < spanCount; i++ ) {
+ final String text = textList.get( i );
+ final TextSpan spanI = ((DiscontiguousTextSpan)span).getTextSpan( i );
+ final int offset = spanI.getStartIndex() - spanStart;
+ final int length = Math.min( text.length(), spanI.getLength() );
+ if ( offset + length <= characters.length ) {
+ text.getChars( 0, length, characters, offset );
+ }
+ }
+ return String.valueOf( characters );
+ }
+ return createSpannedText( textElementList.get( 0 ) );
+ }
+
+ /**
+ * @param annotationInfo -
+ * @param attributeMap map of entityIDs and attributes
+ * @return a list of attributes for the given annotationInfo
+ */
+ static private List<Attribute> getAttributeList( final AnnotationInfo annotationInfo,
+ final Map<String, Attribute> attributeMap ) {
+ final List<Attribute> attributeList = new ArrayList<>();
+ for ( String associationId : annotationInfo.getLinkedIds() ) {
+ final Attribute attribute = attributeMap.get( associationId );
+ if ( attribute == null ) {
+ continue;
+ }
+ attributeList.add( attribute );
+ }
+ return attributeList;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ protected TextSpan createEntityTextSpan( final Element spanElement ) {
+ final String start = spanElement.getAttributeValue( "start" );
+ final String end = spanElement.getAttributeValue( "end" );
+ try {
+ final int startIndex = Integer.parseInt( start );
+ final int endIndex = Integer.parseInt( end );
+ return new EntityTextSpan( startIndex, endIndex );
+ } catch ( NumberFormatException nfE ) {
+ LOGGER.severe( nfE.getMessage() );
+ return BAD_TEXT_SPAN;
+ }
+ }
+
+ /**
+ * @param spannedTextElement an xml element with information on an annotation's spanned text
+ * @return spanned text
+ */
+ static private String createSpannedText( final Element spannedTextElement ) {
+ final List<Text> contentList = spannedTextElement.getContent();
+ if ( contentList == null || contentList.size() != 1 ) {
+ LOGGER.severe( "Bad Content : " + contentList );
+ return BAD_SPANNED_TEXT;
+ } else {
+ return contentList.get( 0 ).getText();
+ }
+ }
+
+ /**
+ * @param rootElement xml root element
+ * @param relationEntityLinkKey1 id key for first relation argument (Entity)
+ * @param relationEntityLinkKey2 id key for second relation argument (Entity)
+ * @param entityMap map of elementIDs and Entities
+ * @param annotationInfoMap map of entityIDs and associated AnnotationInfo
+ * @param relationLinkMap map of elementIDs and Relation Links
+ * @param attributeMap map of elementIDs and Attributes
+ * @return list of Relations created with all the given information
+ */
+ static private List<Relation> getRelations( final Element rootElement,
+ final String relationEntityLinkKey1, final String relationEntityLinkKey2,
+ final Map<String, Entity> entityMap,
+ final Map<String, AnnotationInfo> annotationInfoMap,
+ final Map<String, RelationLink> relationLinkMap,
+ final Map<String, Attribute> attributeMap ) {
+ if ( entityMap == null || entityMap.isEmpty() || annotationInfoMap == null || annotationInfoMap.isEmpty()
+ || relationLinkMap == null || relationLinkMap.isEmpty() ) {
+ return Collections.emptyList();
+ }
+ final List<Element> annotationElementList = rootElement.getChildren( "annotation" );
+ final List<Relation> relationList = new ArrayList<>();
+ final Collection<TextSpan> uniqueRelationSpans = new HashSet<>();
+ for ( Element annotationElement : annotationElementList ) {
+ final List<Element> spanElementList = annotationElement.getChildren( "span" );
+ if ( spanElementList == null || !spanElementList.isEmpty() ) {
+ // entity annotation; for a relation there should be no spannedText
+ continue;
+ }
+ final Element infoElement = annotationElement.getChild( "mention" );
+ final String relationId = (infoElement == null) ? "" : infoElement.getAttributeValue( "id" );
+ if ( relationId.equals( "" ) ) {
+ // Can't do much without an id
+ continue;
+ }
+ // Get the annotator name
+ final String annotatorName = annotationElement.getChildText( "annotator" );
+ Entity entity1 = null;
+ Entity entity2 = null;
+ final AnnotationInfo annotationInfo = annotationInfoMap.get( relationId );
+ if ( annotationInfo == null ) {
+ continue;
+ }
+ final Collection<String> linkedIds = annotationInfo.getLinkedIds();
+ if ( linkedIds == null ) {
+ continue;
+ }
+ for ( String associationId : annotationInfo.getLinkedIds() ) {
+ final RelationLink relationLink = relationLinkMap.get( associationId );
+ if ( relationLink == null ) {
+ continue;
+ }
+ if ( relationLink.getDescription().equals( relationEntityLinkKey1 ) ) {
+ entity1 = entityMap.get( relationLink.getRelatedLinkId() );
+ } else if ( relationLink.getDescription().equals( relationEntityLinkKey2 ) ) {
+ entity2 = entityMap.get( relationLink.getRelatedLinkId() );
+ }
+ if ( entity1 != null && entity2 != null ) {
+ final List<Attribute> attributeList = getAttributeList( annotationInfo, attributeMap );
+ attributeList.add( new DefaultAttribute( DefinedAttributeType.UNIQUE_ID, relationId ) );
+ attributeList.add( new DefaultAttribute( DefinedAttributeType.CREATOR, annotatorName ) );
+ final Relation relation = new DefaultRelation( entity1, entity2,
+ ClassTypeFactory.getClassType( annotationInfo.getDescription() ),
+ attributeList.toArray( new Attribute[ attributeList.size() ] ) );
+ if ( uniqueRelationSpans.add( relation.getTextSpan() ) || !CULL_RELATION_REPEATS ) {
+ // Did not already have this span from another annotator.
+ relationList.add( relation );
+ }
+ break;
+ }
+ }
+ }
+ return Collections.unmodifiableList( relationList );
+ }
+
+
+ ///////////////////////////////////////////////////////////
+ // INNER CLASSES
+ ///////////////////////////////////////////////////////////
+
+ /**
+ * Holds Annotation Description and list of associated elementIDs (for resolution of attributes, etc. later)
+ */
+ static final protected class AnnotationInfo {
+ // List of all the associations - associated attributes etc. stored elsewhere
+ private final List<String> __linkedIdsList;
+ private final String __description;
+
+ public AnnotationInfo( final String description, final List<String> linkedIdsList ) {
+ __description = description;
+ __linkedIdsList = linkedIdsList;
+ }
+
+ public List<String> getLinkedIds() {
+ return __linkedIdsList;
+ }
+
+ public String getDescription() {
+ return __description;
+ }
+ }
+
+ /**
+ * Holds elementID of linked annotation and a description of the link
+ */
+ static final protected class RelationLink {
+ private final String __associatedLinkId;
+ private final String __description;
+
+ public RelationLink( final String description, final String associatedLinkId ) {
+ __description = description;
+ __associatedLinkId = associatedLinkId;
+ }
+
+ public String getRelatedLinkId() {
+ return __associatedLinkId;
+ }
+
+ public String getDescription() {
+ return __description;
+ }
+ }
+
+
+}
Added: ctakes/sandbox/timelanes/org/chboston/cnlp/nlp/annotation/annotation/Annotation.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/nlp/annotation/annotation/Annotation.java?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/nlp/annotation/annotation/Annotation.java (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/nlp/annotation/annotation/Annotation.java Thu Feb 19 18:06:13 2015
@@ -0,0 +1,111 @@
+package org.chboston.cnlp.nlp.annotation.annotation;
+
+import org.chboston.cnlp.nlp.annotation.attribute.Attribute;
+import org.chboston.cnlp.nlp.annotation.attribute.AttributeType;
+import org.chboston.cnlp.nlp.annotation.classtype.ClassType;
+import org.chboston.cnlp.nlp.annotation.textspan.TextSpan;
+
+import java.util.Collection;
+import java.util.List;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 5/8/12
+ */
+public interface Annotation {
+
+ /**
+ * @return the textspan of text in which this entity exists
+ */
+ public TextSpan getTextSpan();
+
+ /**
+ * @return the text for the entire textspan of this entity
+ */
+ public String getSpannedText();
+
+ /**
+ * @return a representation of the text for the entire textspan of this entity. Most useful for relations
+ */
+ public String getSpannedTextRepresentation();
+
+ /**
+ * @return the class type of this entity, e.g. "sign or symptom"
+ */
+ public ClassType getClassType();
+
+ /**
+ * @param classType a class type of interest
+ * @return true if this annotation is a classtype
+ */
+ public boolean isClassType( ClassType classType );
+
+ /**
+ * @return all the attributes registered for this annotation
+ */
+ @Deprecated
+ public List<String> getAttributeNames();
+
+ /**
+ * @param key the name of an attribute that may belong to this annotation
+ * @return the attribute if this annotation has it, otherwise null
+ */
+ @Deprecated
+ public Attribute getAttribute( String key );
+
+ /**
+ * @param attributeType the type of attribute that may belong to this annotation
+ * @return the attribute if this annotation has it, otherwise null
+ */
+ public Attribute getAttribute( AttributeType attributeType );
+
+ /**
+ * @return all the attribute types registered for this annotation
+ */
+ public Collection<AttributeType> getAttributeTypes();
+
+ /**
+ * @return all attributes for this Annotation
+ */
+ public Collection<Attribute> getAttributes();
+
+ /**
+ * @param key the name of an attribute that may belong to this annotation
+ * @return the attribute if this annotation has it, otherwise null
+ */
+ public boolean hasAttribute( String key );
+
+ /**
+ * @param attributeType the type of attribute that may belong to this annotation
+ * @return the attribute if this annotation has it, otherwise null
+ */
+ public boolean hasAttribute( AttributeType attributeType );
+
+ /**
+ * @param key the name of an attribute that may belong to this annotation
+ * @return the value of the attribute if this annotation has it, otherwise null
+ */
+ public String getAttributeValue( String key );
+
+ /**
+ * @param attributeType the type of attribute that may belong to this annotation
+ * @return the value of the attribute if this annotation has it, otherwise null
+ */
+ public String getAttributeValue( AttributeType attributeType );
+
+ /**
+ * @return the name of the person or thing that identified this annotation in text
+ */
+ @Deprecated
+ public String getAnnotatorName();
+
+ /**
+ * @param annotation some other annotation to test for equal values
+ * @return true if the annotations represent the same data - not including annotator name, annotation id, etc.
+ */
+ @Deprecated
+ public boolean areValuesEqual( Annotation annotation );
+
+
+}
Added: ctakes/sandbox/timelanes/org/chboston/cnlp/nlp/annotation/annotation/AnnotationInfoPrinter.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/nlp/annotation/annotation/AnnotationInfoPrinter.java?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/nlp/annotation/annotation/AnnotationInfoPrinter.java (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/nlp/annotation/annotation/AnnotationInfoPrinter.java Thu Feb 19 18:06:13 2015
@@ -0,0 +1,284 @@
+//package org.chboston.cnlp.nlp.annotation.annotation;
+//
+//import org.chboston.cnlp.knowtator.annotation.parser.KnowtatorXmlParser;
+//import org.chboston.cnlp.nlp.annotation.attribute.Attribute;
+//import org.chboston.cnlp.nlp.annotation.entity.Entity;
+//import org.chboston.cnlp.nlp.annotation.event.Event;
+//import org.chboston.cnlp.nlp.annotation.relation.Relation;
+//import org.chboston.cnlp.nlp.annotation.textspan.DefaultTextSpan;
+//import org.chboston.cnlp.nlp.annotation.textspan.MultipleTextSpan;
+//import org.chboston.cnlp.nlp.annotation.textspan.TextSpan;
+//import org.chboston.cnlp.nlp.annotation.timex.Timex;
+//import org.chboston.cnlp.timeline.chronic.parser.AtParser;
+//import org.chboston.cnlp.timeline.gui.umlsevent.DefaultUmlsEvent;
+//import org.chboston.cnlp.timeline.gui.umlsevent.UmlsEvent;
+//import org.chboston.cnlp.timeline.timespan.DefaultTimeSpan;
+//import org.chboston.cnlp.timeline.timespan.TimeSpan;
+//
+//import java.io.*;
+//import java.util.*;
+//
+///**
+//* Author: SPF
+//* Affiliation: CHIP-NLP
+//* Date: 10/27/12
+//*/
+//public class AnnotationInfoPrinter {
+//
+// static private final String XML_UMLS_FILE_PATH
+// = "C:\\Spiffy\\Data\\THYME2\\Gold\\UMLS\\set08\\ID008_clinic_024.knowtator.xml";
+// static private final String XML_THYME_FILE_PATH
+// = "C:\\Spiffy\\Data\\THYME2\\Gold\\THYME\\Set08\\ID008_clinic_024.knowtator.xml";
+//
+// static private final String TXT_COREF_FILE_PATH
+// = "C:\\Spiffy\\Data\\THYME2\\Gold\\THYME_Chains\\Set08\\ID008_clinic_024.chains";
+//
+//
+// static private Collection<Collection<TextSpan>> getCoreference() {
+// Collection<Collection<TextSpan>> superSet = new HashSet<Collection<TextSpan>>();
+// try {
+// final BufferedReader reader = new BufferedReader( new FileReader( TXT_COREF_FILE_PATH ) );
+// while ( true ) {
+// final String line = reader.readLine();
+// if ( line == null ) {
+// break;
+// }
+// if ( !line.isEmpty() ) {
+// final String[] spanTexts = line.split("\\s+");
+// final Collection<TextSpan> spanSet = new HashSet<TextSpan>();
+// for ( String spanText : spanTexts ) {
+// final int index = spanText.indexOf('-');
+// if ( index <= 0 || index >= spanText.length()-1 ) {
+// continue;
+// }
+// final String startText = spanText.substring(0, index );
+// final String endText = spanText.substring( index+1 );
+// try {
+// final int start = Integer.parseInt( startText );
+// final int end = Integer.parseInt( endText );
+// spanSet.add( new DefaultTextSpan(start,end) );
+// } catch ( NumberFormatException nfE ) {
+// System.out.println(nfE.getMessage());
+// }
+// }
+// if ( !spanSet.isEmpty() ) {
+// superSet.add(spanSet);
+// }
+// }
+// }
+// reader.close();
+// } catch ( IOException ioE ) {
+// System.out.println( ioE.getMessage() );
+// }
+// return superSet;
+// }
+//
+// static private Collection<Entity> getUmls() {
+// final KnowtatorXmlParser xmlParser = new KnowtatorXmlParser();
+// xmlParser.parseFile( XML_UMLS_FILE_PATH );
+// return xmlParser.getAnnotationCollection().getNamedEntities();
+// }
+//
+// static private Collection<UmlsEvent> getUmlsEvents() {
+// final KnowtatorXmlParser xmlParser = new KnowtatorXmlParser();
+// xmlParser.parseFile( XML_THYME_FILE_PATH );
+// final AnnotationCollection annotationCollection = xmlParser.getAnnotationCollection();
+//
+// final Set<String> timexTexts = new HashSet<String>();
+// final Collection<Timex> times = annotationCollection.getTimes();
+// for ( Timex time : times ) {
+// timexTexts.add( time.getSpannedTextRepresentation() );
+//// System.out.println( "Timex " + time.getSpannedTextRepresentation() );
+//// final List<String> attributeNames = time.getAttributeNames();
+//// for ( String name : attributeNames ) {
+//// final Attribute attribute = time.getAttribute( name );
+//// System.out.println( " " + name + " = " + attribute.getValue() );
+//// }
+// }
+// int i=0;
+// for ( String timexText : timexTexts ) {
+// i++;
+// System.out.println( i + " " + timexText );
+// }
+//
+// // final Timex docTimeDate = AtParser.getDocTimex( annotationCollection.getNamedEntities(),
+//// annotationCollection.getTimes() );
+//// final Calendar docTime = AtParser.getTimexCalendar( docTimeDate );
+//// final TimeSpan timeSpan = new DefaultTimeSpan( docTime, docTime );
+// final Collection<Event> events = annotationCollection.getEvents();
+//
+//
+// final Collection<Entity> entities = getUmls();
+//
+// final Set<UmlsEvent> umlsEventSet = new HashSet<UmlsEvent>( events.size() );
+// for ( Event event : events ) {
+// final Set<Entity> umlsSet = new HashSet<Entity>();
+// final TextSpan textSpan = event.getTextSpan();
+// for ( Entity umls : entities ) {
+// final TextSpan overlapSpan = textSpan.getIntersectionSpan( umls.getTextSpan() );
+// if ( overlapSpan != null && overlapSpan.getLength() > 0 ) {
+// umlsSet.add( umls );
+// }
+// }
+//// UmlsEvent.TIMEX_REL timexRel = UmlsEvent.TIMEX_REL.UNKNOWN;
+//// final String docRelTime = AtParser.getDocTimeRelText( event );
+//// if ( docRelTime.equals( "BEFORE" ) ) {
+//// timexRel = UmlsEvent.TIMEX_REL.BEFORE;
+//// } else if ( docRelTime.equals( "BEFORE/OVERLAP" ) ) {
+//// timexRel = UmlsEvent.TIMEX_REL.BEFORE_OVERLAP;
+//// } else if ( docRelTime.equals( "OVERLAP" ) ) {
+//// timexRel = UmlsEvent.TIMEX_REL.OVERLAP;
+//// } else if ( docRelTime.equals( "AFTER" ) ) {
+//// timexRel = UmlsEvent.TIMEX_REL.AFTER;
+//// }
+//// final UmlsEvent umlsEvent = new DefaultUmlsEvent( (Event)event, timeSpan, timexRel, umlsSet );
+// final Collection<Relation> timeRelations = annotationCollection.getTimeRelations();
+// final Set<Relation> relationSet = new HashSet<Relation>();
+// for ( Relation relation : timeRelations ) {
+// final TextSpan overlapSpan = textSpan.getIntersectionSpan( relation.getTextSpan() );
+// if ( overlapSpan != null && overlapSpan.getLength() > 0 ) {
+// relationSet.add( relation );
+// }
+// }
+//
+// final UmlsEvent umlsEvent = new DefaultUmlsEvent( event, umlsSet, relationSet );
+// umlsEventSet.add( umlsEvent );
+// }
+// return umlsEventSet;
+// }
+//
+//
+//// static private Collection<UmlsEvent> getUmlsEvents2() {
+//// final KnowtatorXmlParser thymeXmlParser = new KnowtatorXmlParser();
+//// thymeXmlParser.parseFile( XML_THYME_FILE_PATH );
+//// final AnnotationCollection thymeAnnotationCollection = thymeXmlParser.getAnnotationCollection();
+//// final KnowtatorXmlParser umlsXmlParser = new KnowtatorXmlParser();
+//// umlsXmlParser.parseFile( XML_UMLS_FILE_PATH );
+//// final AnnotationCollection umlsAnnotationCollection = umlsXmlParser.getAnnotationCollection();
+////
+//// final List<AnnotationCollection> annotationCollectionList = new ArrayList<AnnotationCollection>(2);
+//// annotationCollectionList.add(thymeAnnotationCollection);
+//// annotationCollectionList.add(umlsAnnotationCollection);
+////
+//// final AnnotationCollection annotationCollection
+//// = new ImmutableAnnotationCollection.AnnoteCollectMerger().all( annotationCollectionList).build();
+////
+//// final Entity docTimeDate = AtParser.getDocTimex( annotationCollection.getNamedEntities(),
+//// annotationCollection.getTimes() );
+//// final Calendar docTime = AtParser.getTimexCalendar( docTimeDate );
+//// final TimeSpan timeSpan = new DefaultTimeSpan(docTime,docTime );
+//// final Collection<Entity> events = annotationCollection.getEvents();
+//// final Collection<Entity> entities = annotationCollection.getNamedEntities();
+////
+//// final Set<UmlsEvent> umlsEventSet = new HashSet<UmlsEvent>( events.size() );
+//// for ( Entity event : events ) {
+//// if ( !(event instanceof Event) ) {
+//// continue;
+//// }
+//// final Set<Entity> umlsSet = new HashSet<Entity>();
+//// final TextSpan textSpan = event.getTextSpan();
+//// for ( Entity umls : entities ) {
+//// final TextSpan overlapSpan = textSpan.getIntersectionSpan( umls.getTextSpan() );
+//// if ( overlapSpan != null && overlapSpan.getLength() > 0 ) {
+//// umlsSet.add( umls );
+//// }
+//// }
+//// UmlsEvent.TIMEX_REL timexRel = UmlsEvent.TIMEX_REL.UNKNOWN;
+//// final String docRelTime = AtParser.getDocTimeRelText( event );
+//// if ( docRelTime.equals( "BEFORE" ) ) {
+//// timexRel = UmlsEvent.TIMEX_REL.BEFORE;
+//// } else if ( docRelTime.equals( "BEFORE/OVERLAP" ) ) {
+//// timexRel = UmlsEvent.TIMEX_REL.BEFORE_OVERLAP;
+//// } else if ( docRelTime.equals( "OVERLAP" ) ) {
+//// timexRel = UmlsEvent.TIMEX_REL.OVERLAP;
+//// } else if ( docRelTime.equals( "AFTER" ) ) {
+//// timexRel = UmlsEvent.TIMEX_REL.AFTER;
+//// }
+//// final UmlsEvent umlsEvent = new DefaultUmlsEvent( (Event)event, timeSpan, timexRel, umlsSet );
+//// umlsEventSet.add( umlsEvent );
+//// }
+//// return umlsEventSet;
+//// }
+//
+//
+//
+//
+//// static private String getSpanText( final TextSpan textSpan ) {
+//// final StringBuilder sb = new StringBuilder();
+//// if ( textSpan instanceof MultipleTextSpan ) {
+//// final int spanCount = ((MultipleTextSpan)textSpan).getTextSpanCount();
+//// for ( int i=0; i<spanCount; i++ ) {
+//// sb.append( getSpanText(((MultipleTextSpan)textSpan).getTextSpan( i ))).append( ' ' );
+//// }
+//// } else {
+//// sb.append( textSpan.getStartIndex()).append( "-" ).append( textSpan.getEndIndex() );
+//// }
+//// return sb.toString().trim();
+//// }
+//
+// public static void main( String[] args ) {
+// final Collection<UmlsEvent> umlsEvents = getUmlsEvents();
+// Set<UmlsEvent> umlsEventSet = new HashSet<UmlsEvent>(umlsEvents);
+// for ( UmlsEvent umlsEvent : umlsEvents ) {
+// if ( umlsEvent.getAllUmls().isEmpty() ) {
+// System.out.println("No UMLS Entries : " + umlsEvent );
+// umlsEventSet.remove(umlsEvent);
+// }
+// }
+// for ( UmlsEvent umlsEvent : umlsEvents ) {
+// final Collection<Entity> umlsSet = umlsEvent.getAllUmls();
+// if ( umlsSet.size() == 1 ) {
+// for ( Entity umls : umlsSet ) {
+// final List<String> attributeNameList = umls.getAttributeNames();
+// if ( attributeNameList.contains("NEGATION") ) {
+// System.out.println("Negation Event : " + umlsEvent );
+// umlsEventSet.remove( umlsEvent );
+// }
+// }
+// }
+// }
+// for ( UmlsEvent umlsEvent : umlsEventSet ) {
+// System.out.println( umlsEvent );
+// final List<String> eventAttNames = umlsEvent.getAttributeNames();
+// for ( String name : eventAttNames ) {
+// System.out.println( "\t " + name + " : " + umlsEvent.getAttribute(name).getValue() );
+// }
+// final Collection<Entity> umlsSet = umlsEvent.getAllUmls();
+// for ( Entity umls : umlsSet ) {
+// System.out.println( "\t UMLS Entity: " + umls.getSpannedTextRepresentation() + " = " + umls.getClassType().getName() );
+// final List<String> attributeNames = umls.getAttributeNames();
+// for ( String name : attributeNames ) {
+// System.out.println( "\t\t "+name+" : " + umls.getAttribute(name) );
+// }
+// }
+// final Collection<Relation> timeRelations = umlsEvent.getTimeRelations();
+// for ( Relation relation : timeRelations ) {
+// System.out.println( "\t Time Relation: " + relation.getSpannedTextRepresentation() + " = " + relation.getClassType().getName() );
+// final List<String> attributeNames = relation.getAttributeNames();
+// for ( String name : attributeNames ) {
+// System.out.println( "\t\t "+name+" : " + relation.getAttribute(name) );
+// }
+// }
+// System.out.println();
+// }
+// final Collection<Collection<TextSpan>> corefSet = getCoreference();
+// for ( Collection<TextSpan> spanSet : corefSet ) {
+// final StringBuilder sb = new StringBuilder();
+// TextSpan lastTextSpan = null;
+// for ( TextSpan textSpan : spanSet ) {
+// for ( UmlsEvent umlsEvent : umlsEventSet ) {
+// final TextSpan umlsTextSpan = umlsEvent.getTextSpan();
+// if ( umlsTextSpan.equals( lastTextSpan ) ) {
+// continue;
+// }
+// if ( !umlsTextSpan.getIntersectionSpan( textSpan ).equals( TextSpan.NULL_TEXT_SPAN.INSTANCE ) ) {
+// sb.append( umlsEvent.getSpannedTextRepresentation() ).append(" - " );
+// lastTextSpan = umlsTextSpan;
+// }
+// }
+// }
+// System.out.println( sb.toString() );
+// }
+// }
+//
+//}
Added: ctakes/sandbox/timelanes/org/chboston/cnlp/nlp/annotation/annotation/AnnotationSpanComparator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/nlp/annotation/annotation/AnnotationSpanComparator.java?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/nlp/annotation/annotation/AnnotationSpanComparator.java (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/nlp/annotation/annotation/AnnotationSpanComparator.java Thu Feb 19 18:06:13 2015
@@ -0,0 +1,30 @@
+package org.chboston.cnlp.nlp.annotation.annotation;
+
+import org.chboston.cnlp.nlp.annotation.textspan.TextSpanComparator;
+
+import java.util.Comparator;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 6/29/12
+ */
+public enum AnnotationSpanComparator implements Comparator<Annotation> {
+ INSTANCE;
+
+ static public AnnotationSpanComparator getInstance() {
+ return INSTANCE;
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * @param annotation1 -
+ * @param annotation2 -
+ * @return the difference between textspan startIndices, or the difference between the textspan ends if the starts are equal
+ */
+ public int compare( final Annotation annotation1, final Annotation annotation2 ) {
+ return TextSpanComparator.INSTANCE.compare( annotation1.getTextSpan(), annotation2.getTextSpan() );
+ }
+
+}
Added: ctakes/sandbox/timelanes/org/chboston/cnlp/nlp/annotation/annotation/AnnotationTextComparator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/nlp/annotation/annotation/AnnotationTextComparator.java?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/nlp/annotation/annotation/AnnotationTextComparator.java (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/nlp/annotation/annotation/AnnotationTextComparator.java Thu Feb 19 18:06:13 2015
@@ -0,0 +1,28 @@
+package org.chboston.cnlp.nlp.annotation.annotation;
+
+import java.util.Comparator;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 8/31/12
+ */
+public enum AnnotationTextComparator implements Comparator<Annotation> {
+ INSTANCE;
+
+ static public AnnotationTextComparator getInstance() {
+ return INSTANCE;
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * @param annotation1 -
+ * @param annotation2 -
+ * @return the difference between textspan startIndices, or the difference between the textspan ends if the starts are equal
+ */
+ public int compare( final Annotation annotation1, final Annotation annotation2 ) {
+ return String.CASE_INSENSITIVE_ORDER.compare( annotation1.getSpannedText(), annotation2.getSpannedText() );
+ }
+
+}