You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2015/02/19 19:06:17 UTC
svn commit: r1660963 [19/19] - in /ctakes/sandbox/timelanes: META-INF/ edu/
edu/mayo/ edu/mayo/bmi/ edu/mayo/bmi/annotation/
edu/mayo/bmi/annotation/knowtator/ org/ org/chboston/ org/chboston/cnlp/
org/chboston/cnlp/anafora/ org/chboston/cnlp/anafora/a...
Added: ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/timespan/plus/TimeSpanPlus.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/timespan/plus/TimeSpanPlus.java?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/timespan/plus/TimeSpanPlus.java (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/timespan/plus/TimeSpanPlus.java Thu Feb 19 18:06:13 2015
@@ -0,0 +1,181 @@
+package org.chboston.cnlp.timeline.timespan.plus;
+
+import net.jcip.annotations.Immutable;
+import org.chboston.cnlp.timeline.timespan.AbstractTimeSpan;
+import org.chboston.cnlp.timeline.timespan.EndPointer;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 7/30/13
+ */
+@Immutable
+final public class TimeSpanPlus extends AbstractTimeSpan implements PointedTimeSpan {
+
+ static public final TimeSpanPlus UNKNOWN_TIMESPAN_PLUS
+ = new TimeSpanPlus( TimeEndPoint.NULL_END_POINT, TimeEndPoint.NULL_END_POINT );
+
+
+ final private TimeEndPoint _startTime;
+ final private TimeEndPoint _stopTime;
+
+ public TimeSpanPlus( final TimeEndPoint startTime, final TimeEndPoint stopTime ) {
+ if ( startTime.getMillis() > stopTime.getMillis() ) {
+ // Hopefully this never happens ...
+ _startTime = new TimeEndPoint( EndPointer.OVERLAP, stopTime.getMillis(), stopTime.isFuzzy() );
+ _stopTime = new TimeEndPoint( EndPointer.OVERLAP, startTime.getMillis(), startTime.isFuzzy() );
+ return;
+ }
+ _startTime = startTime;
+ _stopTime = stopTime;
+ }
+
+ public TimeEndPoint getStartTime() {
+ return _startTime;
+ }
+
+ public TimeEndPoint getStopTime() {
+ return _stopTime;
+ }
+
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public long getStartMillis() {
+ return _startTime.getMillis();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public long getStopMillis() {
+ return _stopTime.getMillis();
+ }
+
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public boolean isFuzzyDate() {
+ return _startTime.isFuzzy() || _stopTime.isFuzzy();
+ }
+
+ public String getRelationText() {
+ if ( this.equals( UNKNOWN_TIMESPAN_PLUS ) ) {
+ return "Unknown";
+ }
+ final EndPointer startPointer = _startTime.getPointer();
+ final EndPointer stopPointer = _stopTime.getPointer();
+ String prefix = null;
+ if ( isSingleDate() ) {
+ if ( startPointer == EndPointer.BEFORE
+ && stopPointer == EndPointer.BEFORE ) {
+ prefix = "Occurs Before";
+ } else if ( startPointer == EndPointer.AFTER
+ && stopPointer == EndPointer.AFTER ) {
+ prefix = "Occurs After";
+ } else if ( startPointer == EndPointer.BEFORE
+ && stopPointer == EndPointer.EQUAL ) {
+ prefix = "Ends on";
+ } else if ( startPointer == EndPointer.EQUAL
+ && stopPointer == EndPointer.AFTER ) {
+ prefix = "Begins on";
+ }
+ }
+ if ( prefix == null || prefix.isEmpty() ) {
+ if ( startPointer == EndPointer.BEFORE
+ && stopPointer == EndPointer.BEFORE ) {
+ prefix = "Starts before, ends within";
+ } else if ( startPointer == EndPointer.BEFORE
+ && stopPointer == EndPointer.EQUAL ) {
+ prefix = "Starts before, ends with";
+ } else if ( startPointer == EndPointer.BEFORE
+ && stopPointer == EndPointer.OVERLAP ) {
+ prefix = "Starts before, overlaps";
+ } else if ( startPointer == EndPointer.BEFORE
+ && stopPointer == EndPointer.AFTER ) {
+ prefix = "Starts before, ends after";
+
+ } else if ( startPointer == EndPointer.AFTER
+ && stopPointer == EndPointer.BEFORE ) {
+ prefix = "Is Within";
+ } else if ( startPointer == EndPointer.AFTER
+ && stopPointer == EndPointer.EQUAL ) {
+ prefix = "Starts within, ends with";
+ } else if ( startPointer == EndPointer.AFTER
+ && stopPointer == EndPointer.OVERLAP ) {
+ prefix = "Starts within, overlaps";
+ } else if ( startPointer == EndPointer.AFTER
+ && stopPointer == EndPointer.AFTER ) {
+ prefix = "Starts within, ends after";
+
+ } else if ( startPointer == EndPointer.EQUAL
+ && stopPointer == EndPointer.BEFORE ) {
+ prefix = "Starts with, ends before";
+ } else if ( startPointer == EndPointer.EQUAL
+ && stopPointer == EndPointer.EQUAL ) {
+ prefix = "Starts with, ends with";
+ } else if ( startPointer == EndPointer.EQUAL
+ && stopPointer == EndPointer.OVERLAP ) {
+ prefix = "Starts with, overlaps";
+ } else if ( startPointer == EndPointer.EQUAL
+ && stopPointer == EndPointer.AFTER ) {
+ prefix = "Starts with, ends after";
+
+ } else if ( startPointer == EndPointer.OVERLAP
+ && stopPointer == EndPointer.BEFORE ) {
+ prefix = "Overlaps, ends within";
+ } else if ( startPointer == EndPointer.OVERLAP
+ && stopPointer == EndPointer.EQUAL ) {
+ prefix = "Overlaps, ends with";
+ } else if ( startPointer == EndPointer.OVERLAP
+ && stopPointer == EndPointer.OVERLAP ) {
+ prefix = "Overlaps";
+ } else if ( startPointer == EndPointer.OVERLAP
+ && stopPointer == EndPointer.AFTER ) {
+ prefix = "Overlaps, ends after";
+ }
+ }
+ return prefix;
+ }
+
+ public String getSpanText() {
+ if ( this.equals( UNKNOWN_TIMESPAN_PLUS ) ) {
+ return "";
+ }
+ return super.toString();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public String toString() {
+ return getRelationText() + " " + getSpanText();
+ }
+
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public int hashCode() {
+ return _startTime.hashCode() + 3 * _stopTime.hashCode();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public boolean equals( final Object object ) {
+ return object instanceof TimeSpanPlus
+ && ((TimeSpanPlus)object)._startTime.equals( _startTime )
+ && ((TimeSpanPlus)object)._stopTime.equals( _stopTime );
+ }
+
+
+}
Added: ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/timespan/plus/TimeSpanPlusComparator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/timespan/plus/TimeSpanPlusComparator.java?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/timespan/plus/TimeSpanPlusComparator.java (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/timespan/plus/TimeSpanPlusComparator.java Thu Feb 19 18:06:13 2015
@@ -0,0 +1,56 @@
+package org.chboston.cnlp.timeline.timespan.plus;
+
+
+import java.util.Comparator;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 8/2/13
+ */
+public enum TimeSpanPlusComparator implements Comparator<PointedTimeSpan> {
+ INSTANCE;
+
+ static public TimeSpanPlusComparator getInstance() {
+ return INSTANCE;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public int compare( final PointedTimeSpan timeSpan1, final PointedTimeSpan timeSpan2 ) {
+ final int startComparison = TimeEndPointComparator.getInstance().compare( timeSpan1.getStartTime(),
+ timeSpan2.getStartTime() );
+ if ( startComparison != 0 ) {
+ return startComparison;
+ }
+ return TimeEndPointComparator.getInstance().compare( timeSpan1.getStopTime(), timeSpan2.getStopTime() );
+ }
+
+ static private enum TimeEndPointComparator implements Comparator<TimeEndPoint> {
+ INSTANCE;
+
+ static public TimeEndPointComparator getInstance() {
+ return INSTANCE;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public int compare( final TimeEndPoint endPoint1, final TimeEndPoint endPoint2 ) {
+ final long millis1 = endPoint1.getMillis();
+ final long millis2 = endPoint2.getMillis();
+ if ( millis1 < millis2 ) {
+ return -1;
+ } else if ( millis2 < millis1 ) {
+ return 1;
+ }
+ return endPoint1.getPointer().getOrder() - endPoint2.getPointer().getOrder();
+ }
+
+ }
+
+
+}
Added: ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/todo.txt
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/todo.txt?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/todo.txt (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/todo.txt Thu Feb 19 18:06:13 2015
@@ -0,0 +1,27 @@
+ok 1. UmlsEvent to use RelativeTimeSpan instead of its own TimexRel enum
+1. Create UmlsEvent by Relation parse with RelativeTimeSpan
+2. Modify TimeSpanRenderer Before and After
+3. Get rid of Linked Scrollers
+4. Add to Semantic Type collection on Search
+5. Add remove button to left of each event lane
+
+4. Add button to expand / collapse semantic type
+
+10. Fix update on Header for Events
+
+
+
+
+
+X?- Colors on dates for UMLS types
+X?- Add I2B2 Color Scheme
+- Get Semantic Types for Events
+- Cull / Combine by coreference
+- "Lifeline Date" header listing dates w/o overlap that change with zoom
+- "&" and "|" searches
+?- Sort by "criticality" of event
+- Link timexRel events across timespans : AFTER> - - - <BEFORE
+?- Mark Permanence of Disease
+- Improve zoom bar labels (1x 2x ... 100% 200% ...)
+- Fix VerticalMimicPanel - resize updates not being painted properly
+
Added: ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/util/CuiPrinter.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/util/CuiPrinter.java?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/util/CuiPrinter.java (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/util/CuiPrinter.java Thu Feb 19 18:06:13 2015
@@ -0,0 +1,97 @@
+package org.chboston.cnlp.timeline.util;
+
+import org.chboston.cnlp.anafora.annotation.parser.AnaforaXmlParser;
+import org.chboston.cnlp.nlp.annotation.annotation.store.AnnotationStore;
+import org.chboston.cnlp.nlp.annotation.attribute.DefinedAttributeType;
+import org.chboston.cnlp.nlp.annotation.entity.Entity;
+import org.chboston.cnlp.nlp.annotation.parser.AnnotationsParser;
+
+import java.io.*;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.logging.Logger;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/19/2014
+ */
+final public class CuiPrinter {
+
+ static private final Logger LOGGER = Logger.getLogger( "CuiPrinter" );
+
+ private CuiPrinter() {}
+
+
+
+ static private void writeCuis( final File outputFile, final AnnotationStore annotationStore ) {
+ final Collection<Entity> entities = annotationStore.getNamedEntities();
+ try ( Writer writer = new BufferedWriter( new FileWriter( outputFile ) ) ) {
+ for ( Entity entity : entities ) {
+ String cui = entity.getAttributeValue( DefinedAttributeType.CUI );
+ if ( cui == null || cui.isEmpty() ) {
+ cui = "UNKNOWN";
+ }
+ String tui = entity.getAttributeValue( DefinedAttributeType.TUI );
+ if ( tui == null || tui.isEmpty() ) {
+ tui = "UNKNOWN";
+ }
+ writer.write( entity.getTextSpan().getStartIndex() + "," + entity.getTextSpan().getEndIndex()
+ + " " + cui + "_" + tui + " " + entity.getSpannedText() + "\n" );
+ }
+ writer.write( "Total Words: " + annotationStore.getWordCount() + "\n");
+ writer.write( "Total Annotations: " + entities.size() + "\n" );
+ } catch (IOException ioE ) {
+ LOGGER.severe( ioE.getMessage() );
+ }
+ }
+
+
+ static private void copyGoldEntityXmls( final File inputDir, final File outputDir ) {
+ final String[] fileNames = inputDir.list();
+ if ( fileNames == null ) {
+ return;
+ }
+ File bestXml = null;
+ long longestLength = 0;
+ for ( String fileName : fileNames ) {
+ if ( fileName.endsWith( ".UMLS-Entity.gold.completed.xml" ) ) {
+ bestXml = new File( inputDir, fileName );
+ break;
+ }
+ if ( fileName.contains( ".UMLS-Entity" ) ) {
+ final File entityXml = new File( inputDir, fileName );
+ if ( entityXml.length() > longestLength ) {
+ bestXml = entityXml;
+ longestLength = entityXml.length();
+ }
+ }
+ }
+ if ( bestXml == null ) {
+ return;
+ }
+ final AnnotationsParser parser = new AnaforaXmlParser();
+ parser.setDocumentTextFile( new File( inputDir, inputDir.getName() + ".txt" ) );
+ parser.parseFile( bestXml.getPath() );
+ final AnnotationStore annotationStore = parser.getAnnotationStore();
+ final File outputFile = new File( outputDir, bestXml.getName() + ".out" );
+ writeCuis( outputFile, annotationStore );
+ }
+
+
+ public static void main( String... args ) {
+ final String inputParentPath = "C:\\Spiffy\\prj_thyme\\data\\internal\\annotations\\release_gold\\ColonCancer";//args[0];
+ final String outputDirPath = "C:\\Spiffy\\prj_thyme\\output\\temp\\release_gold_cuis";//args[1];
+ final File outputDir = new File( outputDirPath );
+
+ final File inputParentDir = new File( inputParentPath );
+ final File[] inputDirs = inputParentDir.listFiles();
+ for ( File inputDir : inputDirs ) {
+ copyGoldEntityXmls( inputDir, outputDir );
+ }
+ }
+
+
+}
Added: ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/util/GoldSerializer.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/util/GoldSerializer.java?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/util/GoldSerializer.java (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/util/GoldSerializer.java Thu Feb 19 18:06:13 2015
@@ -0,0 +1,149 @@
+package org.chboston.cnlp.timeline.util;
+
+import org.chboston.cnlp.nlp.annotation.annotation.store.AnnotationStore;
+import org.chboston.cnlp.nlp.annotation.annotation.store.AnnotationStoreFactory;
+import org.chboston.cnlp.timeline.gui.qaclipper.TimelineAnaforaWriter5;
+import org.chboston.cnlp.timeline.timeline.Timeline;
+import org.chboston.cnlp.timeline.timeline.TimelineFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.Collection;
+import java.util.HashSet;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 8/15/14
+ */
+final public class GoldSerializer {
+
+ private GoldSerializer() {
+ }
+
+ static private final File ROOT_DIR = new File( "C:/Spiffy/Data/IAA/THYME5/IaaInput_ColonCancer/a_ux" );
+ static private final File IAA_IN_DIR = new File( "C:/Spiffy/Data/IAA/THYME5/IaaInput_ColonCancer/a_gold_ux" );
+
+ static private final String SERIALIZATIONS = "C:/Spiffy/Output/Timeline/Serialized/Gold/ColonCancer";
+
+ // static private final File ROOT_IN_DIR = new File( "C:\\Spiffy\\prj_thyme\\data\\external\\extrinsic\\ColonCancer" );
+// static private final File ROOT_OUT_DIR = new File( "C:\\Spiffy\\prj_thyme\\output\\permanent\\extrinsic\\ColonCancer" );
+ static private final File ROOT_IN_DIR
+ = new File( "C:\\Spiffy\\prj_thyme\\data\\internal\\corpus\\colon_cancer\\processed_test" );
+ static private final File ROOT_OUT_DIR
+ = new File( "C:\\Spiffy\\prj_thyme\\output\\permanent\\extrinsic\\colon_cancer\\from_xmi" );
+
+
+ public static void main( final String[] args ) {
+// makeCopies();
+// copyNotes();
+// serializeTimelines();
+// serializeTimelines( ROOT_IN_DIR, ROOT_OUT_DIR );
+ serializeXmiTimelines( ROOT_IN_DIR, ROOT_OUT_DIR );
+ }
+
+ static private void serializeTimelines( final File rootInDir, final File rootOutDir ) {
+ final File[] subDirs = rootInDir.listFiles();
+ if ( subDirs == null ) {
+ return;
+ }
+ for ( File subDir : subDirs ) {
+ final AnnotationStore annotationStore
+ = AnnotationStoreFactory
+ .createAnnotationCollection2( subDir, new File( subDir, subDir.getName() + ".txt" ) );
+ if ( annotationStore.getTimeRelations().isEmpty() ) {
+ continue;
+ }
+ System.out.println( "Serializing " + subDir.getName() );
+ final Timeline timeline = TimelineFactory.createTimeline( subDir.getName(), annotationStore );
+ final File outSubDir = new File( rootOutDir, subDir.getName() );
+ outSubDir.mkdirs();
+ TimelineAnaforaWriter5.writeTimeline( outSubDir.getPath() + "/" + outSubDir.getName(), timeline );
+ }
+ }
+
+ static private void serializeXmiTimelines( final File rootInDir, final File rootOutDir ) {
+ final File[] xmiFiles = rootInDir.listFiles();
+ if ( xmiFiles == null ) {
+ return;
+ }
+ for ( File xmiFile : xmiFiles ) {
+ final AnnotationStore annotationStore
+ = AnnotationStoreFactory.createAnnotationCollection( xmiFile.getPath() );
+ if ( annotationStore.getTimeRelations().isEmpty() ) {
+ continue;
+ }
+ System.out.println( "Serializing " + xmiFile.getName() );
+ final Timeline timeline = TimelineFactory.createTimeline( xmiFile.getName(), annotationStore );
+ rootOutDir.mkdirs();
+ TimelineAnaforaWriter5.writeTimeline( rootOutDir.getPath() + "/" + xmiFile.getName(), timeline );
+ }
+ }
+
+
+ static private void serializeTimelines() {
+ final File[] subDirs = IAA_IN_DIR.listFiles();
+ if ( subDirs == null ) {
+ return;
+ }
+ for ( File subDir : subDirs ) {
+ final AnnotationStore annotationStore
+ = AnnotationStoreFactory.createAnnotationCollection2( subDir.getPath(), subDir.getName() );
+ if ( annotationStore.getTimeRelations().isEmpty() ) {
+ continue;
+ }
+ System.out.println( "Serializing " + subDir.getName() );
+ final Timeline timeline = TimelineFactory.createTimeline( subDir.getName(), annotationStore );
+ final String outputPath = SERIALIZATIONS + "/" + subDir.getName();
+ TimelineAnaforaWriter5.writeTimeline( outputPath, timeline );
+ }
+ }
+
+
+ static private void makeCopies() {
+ final Collection<String> setNames = new HashSet<>();
+ final String[] fileNames = ROOT_DIR.list();
+ for ( String fileName : fileNames ) {
+ setNames.add( fileName.substring( 0, fileName.indexOf( '.' ) ) );
+ }
+ for ( String setName : setNames ) {
+ final File setDir = new File( IAA_IN_DIR, setName );
+ setDir.mkdir();
+ for ( String fileName : fileNames ) {
+ if ( fileName.startsWith( setName ) ) {
+ final File inputFile = new File( ROOT_DIR, fileName );
+ final File outputFile = new File( setDir, fileName );
+ System.out.println( inputFile.getPath() + " > " + outputFile.getPath() );
+ try {
+ Files.copy( inputFile.toPath(), outputFile.toPath() );
+ } catch ( IOException ioE ) {
+ System.err.println( ioE.getMessage() );
+ }
+ }
+ }
+ }
+ }
+
+
+ static private void copyNotes() {
+ final File rootDir = new File( "C:/Spiffy/Data/IAA/THYME5/ColonCancer" );
+ final File[] subDirs = rootDir.listFiles();
+ for ( File subDir : subDirs ) {
+ final File noteFile = new File( subDir, subDir.getName() );
+ if ( !noteFile.exists() ) {
+ System.out.println( "No note for " + subDir.getName() );
+ continue;
+ }
+ final File outputDir = new File( IAA_IN_DIR, subDir.getName() );
+ final File outputFile = new File( outputDir, subDir.getName() );
+ try {
+ Files.copy( noteFile.toPath(), outputFile.toPath() );
+ } catch ( IOException ioE ) {
+ System.err.println( ioE.getMessage() );
+ }
+ }
+ }
+
+
+}
Added: ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/util/SimpleStoreWriter.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/util/SimpleStoreWriter.java?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/util/SimpleStoreWriter.java (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/util/SimpleStoreWriter.java Thu Feb 19 18:06:13 2015
@@ -0,0 +1,226 @@
+package org.chboston.cnlp.timeline.util;
+
+import org.chboston.cnlp.nlp.annotation.annotation.store.AnnotationStore;
+import org.chboston.cnlp.nlp.annotation.annotation.store.AnnotationStoreFactory;
+import org.chboston.cnlp.nlp.annotation.attribute.DefinedAttributeType;
+import org.chboston.cnlp.nlp.annotation.coreference.CoreferenceChain;
+import org.chboston.cnlp.nlp.annotation.entity.Entity;
+import org.chboston.cnlp.nlp.annotation.relation.Relation;
+import org.chboston.cnlp.timeline.timeline.Timeline;
+import org.chboston.cnlp.timeline.timeline.TimelineFactory;
+import org.chboston.cnlp.timeline.timespan.plus.PointedTimeSpan;
+
+import java.io.*;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.logging.FileHandler;
+import java.util.logging.LogRecord;
+import java.util.logging.Logger;
+import java.util.logging.SimpleFormatter;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 1/12/2015
+ */
+public class SimpleStoreWriter {
+
+ static private final Logger LOGGER = Logger.getLogger( "SimpleStoreWriter" );
+
+
+// static private final File ROOT_IN_DIR
+// = new File( "C:\\Spiffy\\prj_darth_phenome\\data\\internal\\xmi\\27Notes_Jan9_2015" );
+// static private final File ROOT_OUT_DIR
+// = new File( "C:\\Spiffy\\prj_darth_phenome\\data\\internal\\xmi\\27Notes_Jan9_2015\\simple2" );
+ static private final boolean XMI = false;
+static private final File ROOT_IN_DIR
+// = new File( "C:\\Spiffy\\prj_thyme\\data\\internal\\x_eval\\annotations\\layer\\aggregate\\ctakes\\colon\\test" );
+ = new File( "C:\\Spiffy\\prj_thyme\\data\\internal\\annotations\\release_gold\\ColonCancer" );
+ static private final File ROOT_OUT_DIR
+// = new File( "C:\\Spiffy\\prj_thyme\\output\\temp\\x_eval\\annotations\\layer\\aggregate\\ctakes\\colon\\negTest" );
+ = new File( "C:\\Spiffy\\prj_thyme\\output\\temp\\x_eval\\annotations\\layer\\aggregate\\gold\\colon\\negTest" );
+
+
+ static private void writeXmiStores( final File rootInDir, final File rootOutDir ) {
+ rootOutDir.mkdirs();
+ final StringBuilder sb = XMI ? runXmiDir( rootInDir, rootOutDir ) : runAnaforaDir( rootInDir, rootOutDir );
+ final String name = "Summary";
+ try ( final Writer writer = new BufferedWriter( new FileWriter( ROOT_OUT_DIR + "/" + name + ".txt" ) ) ) {
+ writer.write( String.format( "%1$40s NE Evnt Time Chn Rltn TLnk Span\n", name ) );
+ writer.write( sb.toString() );
+ writer.write( "\n" );
+ writer.write( String.format( "%1$40s %2$4d %3$4d %4$4d %5$4d %6$4d %7$4d %8$4d\n", "Total",
+ _entities, _events, _timex3s, _chains, _relations, _tlinks, _spans ) );
+ } catch ( IOException ioE ) {
+ LOGGER.severe( ioE.getMessage() );
+ }
+ try ( final Writer writer = new BufferedWriter( new FileWriter( ROOT_OUT_DIR + "/" + name + ".err.txt" ) ) ) {
+ for ( String unparsed : _unparsedTimexList ) {
+ writer.write( unparsed + "\n" );
+ }
+ } catch ( IOException ioE ) {
+ LOGGER.severe( ioE.getMessage() );
+ }
+ }
+
+ static private StringBuilder runXmiDir( final File xmiDir, final File rootOutDir ) {
+ final StringBuilder sb = new StringBuilder();
+ final File[] xmiFiles = xmiDir.listFiles();
+ if ( xmiFiles == null ) {
+ return sb;
+ }
+ for ( File xmiFile : xmiFiles ) {
+ if ( xmiFile.isDirectory() ) {
+ sb.append( runXmiDir( xmiFile, rootOutDir ) );
+ continue;
+ }
+ final AnnotationStore annotationStore
+ = AnnotationStoreFactory.createAnnotationCollection( xmiFile.getPath() );
+ final String countText = writeAnnotationStore( rootOutDir, xmiFile.getName(), annotationStore );
+ sb.append( countText );
+ }
+ return sb;
+ }
+
+ static private StringBuilder runAnaforaDir( final File anaforaDir, final File rootOutDir ) {
+ final StringBuilder sb = new StringBuilder();
+ final File[] anaforaFiles = anaforaDir.listFiles();
+ if ( anaforaFiles == null ) {
+ return sb;
+ }
+ for ( File anaforaFile : anaforaFiles ) {
+ if ( anaforaFile.isDirectory() ) {
+ sb.append( runAnaforaDir( anaforaFile, rootOutDir ) );
+ continue;
+ }
+ if ( !anaforaFile.getName().endsWith( ".txt" ) ) {
+ continue;
+ }
+ final AnnotationStore annotationStore
+ = AnnotationStoreFactory.createAnnotationCollection( anaforaFile.getPath() );
+ final String countText = writeAnnotationStore( rootOutDir, anaforaFile.getName(), annotationStore );
+ sb.append( countText );
+ }
+ return sb;
+ }
+
+
+
+ static private final Collection<String> _unparsedTimexList = new ArrayList<>();
+ static private int _entities;
+ static private int _events;
+ static private int _timex3s;
+ static private int _chains;
+ static private int _relations;
+ static private int _tlinks;
+ static private int _spans;
+
+
+ static private String writeAnnotationStore( final File rootOutDir, final String name,
+ final AnnotationStore annotationStore ) {
+ final File outputFile = new File( rootOutDir, name + ".simple.txt" );
+ final File errorFile = new File( rootOutDir, name + ".error.txt" );
+ final Logger timeSpanFactoryLogger = Logger.getLogger( "TimeSpanFactory" );
+ final Logger tlinkCloserLogger = Logger.getLogger( "TLinkTypeArray3" );
+ LOGGER.info( "Writing Simple " + outputFile.getPath() );
+ try ( final Writer writer = new BufferedWriter( new FileWriter( outputFile ) ) ) {
+ final FileHandler errorHandler = new FileHandler( errorFile.getPath() );
+ final SimpleFormatter errorFormatter = new SimpleFormatter() {
+ public synchronized String format( final LogRecord record ) {
+ _unparsedTimexList.add( formatMessage( record ) );
+ return formatMessage( record ) + "\n";
+ }
+ };
+ errorHandler.setFormatter( errorFormatter );
+ timeSpanFactoryLogger.addHandler( errorHandler );
+ tlinkCloserLogger.addHandler( errorHandler );
+ Collection<Entity> entities = annotationStore.getNamedEntities();
+ final int entityCount = entities.size();
+ _entities += entityCount;
+ for ( Entity entity : entities ) {
+ final String lineText = String.format( "%1$20s | %2$30s | %3$3d,%4$3d",
+ entity.getClassType(), entity.getSpannedTextRepresentation(),
+ entity.getTextSpan().getStartIndex(), entity.getTextSpan().getEndIndex() );
+ writer.write( lineText + "\n" );
+ }
+ entities = annotationStore.getEvents();
+ final int eventCount = entities.size();
+ _events += eventCount;
+ for ( Entity entity : entities ) {
+ final String lineText = String.format( "%1$20s | %2$30s | %3$3d,%4$3d",
+ entity.getClassType(), entity.getSpannedTextRepresentation(),
+ entity.getTextSpan().getStartIndex(), entity.getTextSpan().getEndIndex() );
+ writer.write( lineText + "\n" );
+ }
+ entities = annotationStore.getTimes();
+ final int timesCount = entities.size();
+ _timex3s += timesCount;
+ for ( Entity entity : entities ) {
+ final String lineText = String.format( "%1$20s | %2$30s | %3$3d,%4$3d",
+ entity.getClassType(), entity.getSpannedTextRepresentation(),
+ entity.getTextSpan().getStartIndex(), entity.getTextSpan().getEndIndex() );
+ writer.write( lineText + "\n" );
+ }
+ final Collection<CoreferenceChain> chains = annotationStore.getCoreferenceChains();
+ int chainCount = 0;
+ for ( CoreferenceChain chain : chains ) {
+ if ( chain.getChainLength() == 1 || !chain.getSpannedTextRepresentation().contains( " ... " ) ) {
+ continue;
+ }
+ final String lineText = String.format( "%1$20s | %2$30s | %3$3d,%4$3d",
+ chain.getClassType(), chain.getSpannedTextRepresentation(),
+ chain.getTextSpan().getStartIndex(), chain.getTextSpan().getEndIndex() );
+ writer.write( lineText + "\n" );
+ chainCount++;
+ }
+ _chains += chainCount;
+ Collection<Relation> relations = annotationStore.getUmlsRelations();
+ final int relationCount = relations.size();
+ _relations += relationCount;
+ for ( Relation relation : relations ) {
+ final String lineText = String.format( "%1$20s | %2$30s | %3$3d,%4$3d",
+ relation.getClassType(), relation.getSpannedTextRepresentation(),
+ relation.getTextSpan().getStartIndex(), relation.getTextSpan().getEndIndex() );
+ writer.write( lineText + "\n" );
+ }
+ relations = annotationStore.getTimeRelations();
+ final int tlinkCount = relations.size();
+ _tlinks += tlinkCount;
+ for ( Relation relation : relations ) {
+ final String tlinkType = relation.getFirstEntity().getSpannedTextRepresentation()
+ + " " + relation.getAttributeValue( DefinedAttributeType.RELATION_TYPE )
+ + " " + relation.getSecondEntity().getSpannedTextRepresentation();
+ final String lineText = String.format( "%1$20s | %2$30s | %3$3d,%4$3d",
+ relation.getClassType(), tlinkType,
+ relation.getTextSpan().getStartIndex(), relation.getTextSpan().getEndIndex() );
+ writer.write( lineText + "\n" );
+ }
+ final Timeline timeline = TimelineFactory.createTimeline( name, annotationStore );
+ _spans += timeline.getTimeSpans().size();
+ for ( PointedTimeSpan timeSpan : timeline ) {
+ writer.write( timeSpan + "\n" );
+ }
+ writer.write( "\n\n" );
+ writer.write( annotationStore.getDocumentText() );
+ writer.write( "\n\n" );
+ writer.write( " NE Evnt Time Chn Rltn TLnk Span\n" );
+ final String countText =
+ String.format( "%1$4d %2$4d %3$4d %4$4d %5$4d %6$4d %7$4d\n",
+ entityCount, eventCount, timesCount, chainCount,
+ relationCount, tlinkCount, timeline.getTimeSpans().size() );
+ writer.write( countText + "\n" );
+ errorHandler.flush();
+ errorHandler.close();
+ timeSpanFactoryLogger.removeHandler( errorHandler );
+ tlinkCloserLogger.removeHandler( errorHandler );
+ return String.format( "%1$40s ", name ) + countText;
+ } catch ( IOException ioE ) {
+ LOGGER.severe( ioE.getMessage() );
+ }
+ return "";
+ }
+
+ public static void main( final String... args ) {
+ writeXmiStores( ROOT_IN_DIR, ROOT_OUT_DIR );
+ }
+}
Added: ctakes/sandbox/timelanes/org/chboston/cnlp/xmi/XmiEolFixer.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/xmi/XmiEolFixer.java?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/xmi/XmiEolFixer.java (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/xmi/XmiEolFixer.java Thu Feb 19 18:06:13 2015
@@ -0,0 +1,75 @@
+package org.chboston.cnlp.xmi;
+
+import java.io.*;
+import java.util.logging.Logger;
+import java.util.regex.Pattern;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 10/13/2014
+ */
+final public class XmiEolFixer {
+
+ static private final Logger LOGGER = Logger.getLogger( "XmiEolFixer" );
+
+ private XmiEolFixer() {
+ }
+
+ static private final String INPUT_DIR_PATH
+ = "C:\\Spiffy\\prj_darth_phenome\\data\\internal\\xmi\\27Notes_Jan9_2015";
+ static private final String OUTPUT_DIR_PATH
+ = "C:\\Spiffy\\prj_darth_phenome\\data\\internal\\xmi\\27Notes_Jan9_2015\\xml";
+
+
+ public static void main( String[] args ) {
+ final File inputDir = new File( INPUT_DIR_PATH );
+ final File[] files = inputDir.listFiles();
+ if ( files == null ) {
+ LOGGER.warning( "No files in " + inputDir.getPath() );
+ System.exit( 0 );
+ }
+ final Pattern pattern = Pattern.compile( "><" );
+ final char[] buffer = new char[ 1024 ];
+ for ( File file : files ) {
+ if ( !file.getName().endsWith( ".xmi" ) ) {
+ continue;
+ }
+ final String filePathOld = file.getPath();
+ final String filePathNew = OUTPUT_DIR_PATH + "/" + file.getName() + ".xml";
+ try ( BufferedReader reader = new BufferedReader( new FileReader( filePathOld ) );
+ Writer writer = new BufferedWriter( new FileWriter( filePathNew ) ) ) {
+ int length = reader.read( buffer );
+ while ( length > 0 ) {
+ final String text = new String( buffer, 0, length );
+ if ( text.startsWith( "><" ) ) {
+ writer.write( ">\n<" );
+ } else if ( text.startsWith( "<" ) ) {
+ writer.write( "<" );
+ }
+ final String[] lines = pattern.split( text );
+ if ( lines.length == 1 ) {
+ writer.write( lines[ 0 ] );
+ } else if ( lines.length > 1 ) {
+ writer.write( lines[ 0 ] + ">\n" );
+ for ( int i = 1; i < lines.length - 1; i++ ) {
+ if ( !lines[ i ].isEmpty() ) {
+ writer.write( "<" + lines[ i ] + ">\n" );
+ }
+ }
+ writer.write( "<" + lines[ lines.length - 1 ] );
+ }
+ if ( text.endsWith( "><" ) && text.length() > 2 ) {
+ writer.write( ">\n<" );
+ } else if ( text.endsWith( ">" ) ) {
+ writer.write( ">\n" );
+ }
+ length = reader.read( buffer );
+ }
+ } catch ( IOException ioE ) {
+ LOGGER.severe( ioE.getMessage() );
+ }
+ }
+ }
+
+}
Added: ctakes/sandbox/timelanes/org/chboston/cnlp/xmi/parser/UimaXmiParser.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/xmi/parser/UimaXmiParser.java?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/xmi/parser/UimaXmiParser.java (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/xmi/parser/UimaXmiParser.java Thu Feb 19 18:06:13 2015
@@ -0,0 +1,695 @@
+package org.chboston.cnlp.xmi.parser;
+
+import org.chboston.cnlp.nlp.annotation.annotation.store.ImmutableAnnotationStore;
+import org.chboston.cnlp.nlp.annotation.attribute.AttributeType;
+import org.chboston.cnlp.nlp.annotation.attribute.AttributeTypeFactory;
+import org.chboston.cnlp.nlp.annotation.attribute.DefaultAttribute;
+import org.chboston.cnlp.nlp.annotation.attribute.DefinedAttributeType;
+import org.chboston.cnlp.nlp.annotation.classtype.ClassType;
+import org.chboston.cnlp.nlp.annotation.classtype.CustomClassType;
+import org.chboston.cnlp.nlp.annotation.classtype.SemanticClassType;
+import org.chboston.cnlp.nlp.annotation.classtype.TemporalClassType;
+import org.chboston.cnlp.nlp.annotation.coreference.CoreferenceChain;
+import org.chboston.cnlp.nlp.annotation.coreference.CoreferenceChainSpanComparator;
+import org.chboston.cnlp.nlp.annotation.coreference.CoreferenceFactory;
+import org.chboston.cnlp.nlp.annotation.entity.DefaultEntity;
+import org.chboston.cnlp.nlp.annotation.entity.Entity;
+import org.chboston.cnlp.nlp.annotation.parser.AbstractAnnotationXmlParser;
+import org.chboston.cnlp.nlp.annotation.relation.DefaultRelation;
+import org.chboston.cnlp.nlp.annotation.relation.Relation;
+import org.chboston.cnlp.nlp.annotation.textspan.DefaultTextSpan;
+import org.chboston.cnlp.nlp.annotation.textspan.DiscontiguousTextSpan;
+import org.chboston.cnlp.nlp.annotation.textspan.TextSpan;
+import org.jdom.Attribute;
+import org.jdom.Document;
+import org.jdom.Element;
+import org.jdom.JDOMException;
+import org.jdom.input.SAXBuilder;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.*;
+import java.util.logging.Logger;
+import java.util.regex.Pattern;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 10/13/2014
+ */
+final public class UimaXmiParser extends AbstractAnnotationXmlParser {
+
+ static private final Logger LOGGER = Logger.getLogger( "UimaXmiParser" );
+
+ // TODO Refactor to use XmiTag
+ static private final String ROOT_ELEMENT_NAME = "XMI";
+ static private final String DOCUMENT_TEXT_NAME = "Sofa";
+ static private final String EVENT_MENTION = "EventMention";
+ static private final String TIME_MENTION = "TimeMention";
+ static private final String DATE_ANNOTATION = "DateAnnotation";
+ static private final String TLINK = "TemporalTextRelation";
+ static private final String UMLS = "UmlsConcept";
+
+ static private final String EVENT = "Event";
+ static private final String EVENT_PROPERTIES = "EventProperties";
+
+ static private final String EVENT_ID_KEY = "event";
+ static private final String PROPERTIES_ID_KEY = "properties";
+
+
+ static private final String SIGN_SYMPTOM = "SignSymptomMention";
+ static private final String PROCEDURE = "ProcedureMention";
+ static private final String DISEASE = "DiseaseDisorderMention";
+ static private final String MEDICATION = "MedicationMention";
+ static private final String ANATOMIC_SITE = "AnatomicalSiteMention";
+
+ static private final String CONCEPT_ARRAY = "ontologyConceptArr";
+ static private final String UMLS_CONCEPT = "UmlsConcept";
+
+ static private final String ID = "id";
+ static private final String DOCUMENT_TEXT = "sofaString";
+ static private final String BEGIN = "begin";
+ static private final String END = "end";
+ static private final String DOC_TIME_REL = "docTimeRel";
+ static private final String POLARITY = "polarity";
+ static private final String LINK_TYPE = "category";
+ static private final String LINK_ARG1 = "arg1";
+ static private final String LINK_ARG2 = "arg2";
+ static private final String RELATION_ARG = "RelationArgument";
+ static private final String ARGUMENT = "argument";
+ static private final String CUI = "cui";
+ static private final String TUI = "tui";
+
+
+ static private final String COREF_RELATION = "CoreferenceRelation";
+ static private final String COREF_CHAIN = "CollectionTextRelation";
+ static private final String COREF_MEMBERS = "members";
+
+
+ static private final String INPUT_DIR_PATH = "C:\\Spiffy\\prj_darth_phenome\\data\\internal\\xmi\\Oct10_2014";
+
+ /**
+ * @param filePath path to file with annotation information
+ * @return true if this AnnotationsParser can properly handle the given file
+ */
+ static public boolean canParse( final String filePath ) {
+ final SAXBuilder saxBuilder = new SAXBuilder();
+ try {
+ final Document document = saxBuilder.build( filePath );
+ final Element rootElement = document.getRootElement();
+ return rootElement != null && rootElement.getName().equals( ROOT_ELEMENT_NAME );
+ } catch ( JDOMException jdomE ) {
+ LOGGER.severe( jdomE.getMessage() );
+ return false;
+ } catch ( IOException ioE ) {
+ LOGGER.severe( ioE.getMessage() );
+ return false;
+ }
+ }
+
+ public boolean preParseFile( final String xmlFilePath ) {
+ return true;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public boolean parseFile( final String xmlFilePath ) {
+ reset();
+ if ( xmlFilePath == null || xmlFilePath.isEmpty() ) {
+ return false;
+ }
+ final File xmlFile = new File( xmlFilePath );
+ if ( !xmlFile.canRead() ) {
+ return false;
+ }
+ final SAXBuilder saxBuilder = new SAXBuilder();
+ try {
+ final Document document = saxBuilder.build( xmlFile );
+ final Element rootElement = document.getRootElement();
+ final String documentText = getDocumentText( rootElement );
+ setDocumentText( documentText );
+ final Map<String, Element> eventMap = createElementIdMap( rootElement, EVENT );
+ final Map<String, Element> eventPropertiesMap = createElementIdMap( rootElement, EVENT_PROPERTIES );
+ final Map<String, Entity> entityMap = getEntityMap( rootElement, eventMap, eventPropertiesMap );
+ final List<Entity> entityList = XmiEntitySegregator.getNamedEntities( entityMap.values() );
+ final List<Entity> eventList = XmiEntitySegregator.getEvents( entityMap.values() );
+ final List<Entity> timexList = XmiEntitySegregator.getTimes( entityMap.values() );
+ final Map<String, String> relationArgMap = getRelationArgMap( rootElement );
+ final List<Relation> tlinkRelationList = getTLinks( rootElement, entityMap, relationArgMap );
+ final List<Collection<TextSpan>> coreferenceTextSpans = new ArrayList<>();
+ coreferenceTextSpans.addAll( getCorefLinks( rootElement, entityMap, relationArgMap ) );
+ coreferenceTextSpans.addAll( getCorefChains( rootElement, entityMap, relationArgMap ) );
+ Collections.sort( coreferenceTextSpans, CoreferenceChainSpanComparator.getInstance() );
+ List<CoreferenceChain> coreferenceChainList = Collections.emptyList();
+ if ( !entityList.isEmpty() || !eventList.isEmpty() || !timexList.isEmpty() ) {
+ coreferenceChainList = CoreferenceFactory.createCoreferenceChains( coreferenceTextSpans, entityList,
+ eventList, timexList );
+ }
+ int wordCount = -1;
+ if ( documentText != null && !documentText.isEmpty() ) {
+ wordCount = documentText.split( "\\s+" ).length;
+ }
+ final ImmutableAnnotationStore.AnnoteCollectBuilder builder
+ = new ImmutableAnnotationStore.AnnoteCollectBuilder();
+ builder.entities( entityList ).events( eventList ).times( timexList );
+// builder.relations( umlsRelationList )
+ builder.timeRelations( tlinkRelationList );
+// builder.coreferenceTextSpans( coreferenceChains );
+ builder.coreferenceChains( coreferenceChainList ).wordCount( wordCount );
+ if ( documentText != null && !documentText.isEmpty() ) {
+ builder.documentText( documentText );
+ }
+ _annotationStore = builder.build();
+ return true;
+ } catch ( JDOMException jdomE ) {
+ LOGGER.severe( jdomE.getMessage() );
+ return false;
+ } catch ( IOException ioE ) {
+ LOGGER.severe( ioE.getMessage() );
+ return false;
+ }
+ }
+
+
+ static private String getDocumentText( final Element rootElement ) {
+ final Element child = getChild( rootElement, DOCUMENT_TEXT_NAME );
+ final String text = child.getAttributeValue( DOCUMENT_TEXT );
+ return text.replace( " ", "\n" );
+ }
+
+ /**
+ * @param rootElement the root xml element in an annotation xml file
+ * @return map of entityIDs and Knowtator Entities
+ */
+ private Map<String, Entity> getEntityMap( final Element rootElement,
+ final Map<String, Element> eventMap,
+ final Map<String, Element> eventPropertiesMap ) {
+ final Map<String, Entity> entityMap = new HashMap<>();
+ entityMap.putAll( getEntities( rootElement, SIGN_SYMPTOM, null, null ) );
+ entityMap.putAll( getEntities( rootElement, PROCEDURE, null, null ) );
+ entityMap.putAll( getEntities( rootElement, DISEASE, null, null ) );
+ entityMap.putAll( getEntities( rootElement, MEDICATION, null, null ) );
+ entityMap.putAll( getEntities( rootElement, ANATOMIC_SITE, null, null ) );
+ entityMap.putAll( getEntities( rootElement, EVENT_MENTION, eventMap, eventPropertiesMap ) );
+ entityMap.putAll( getEntities( rootElement, TIME_MENTION, null, null ) );
+ entityMap.putAll( getEntities( rootElement, DATE_ANNOTATION, null, null ) );
+ return entityMap;
+ }
+
+
+ static private Map<String, String> getRelationArgMap( final Element rootElement ) {
+ final Map<String, String> relationArgMap = new HashMap<>();
+ final List<Element> argElements = getChildren( rootElement, RELATION_ARG );
+ for ( Element argElement : argElements ) {
+ final String argId = getElementId( argElement );
+ final String entityId = argElement.getAttributeValue( ARGUMENT );
+ relationArgMap.put( argId, entityId );
+ }
+ return relationArgMap;
+
+
+ //TODO
+
+ }
+
+
+ static private Map<String, Element> createElementIdMap( final Element rootElement, final String elementClassName ) {
+ final List<Element> classElements = getChildren( rootElement, elementClassName );
+ final Map<String, Element> elementIdMap = new HashMap<>( classElements.size() );
+ for ( Element element : classElements ) {
+ final String elementId = getElementId( element );
+ if ( elementId != null ) {
+ elementIdMap.put( elementId, element );
+ }
+ }
+ return elementIdMap;
+ }
+
+
+ private Map<String, Entity> getEntities( final Element rootElement, final String classElementName,
+ final Map<String, Element> eventMap,
+ final Map<String, Element> eventPropertiesMap ) {
+ final List<Element> classElements = getChildren( rootElement, classElementName );
+ final Map<String, Entity> entityMap = new HashMap<>();
+ final String documentText = getDocumentText();
+ for ( Element entityElement : classElements ) {
+ final String entityId = getElementId( entityElement );
+ final TextSpan textSpan = createEntityTextSpan( entityElement );
+ if ( textSpan.equals( BAD_TEXT_SPAN ) ) {
+ continue;
+ }
+ final String spannedText = getSpannedText( textSpan, documentText );
+ if ( spannedText.trim().isEmpty() ) {
+ continue;
+ }
+ final ClassType classType = getEntityClassType( classElementName );
+ final Collection<org.chboston.cnlp.nlp.annotation.attribute.Attribute> nlpAttributes
+ = createNlpAttributes( entityElement );
+ if ( classElementName.equals( EVENT_MENTION ) ) {
+ final String eventId = entityElement.getAttributeValue( EVENT_ID_KEY );
+ nlpAttributes.addAll( parseEventAttributes( rootElement, eventId, eventMap, eventPropertiesMap ) );
+ } else if ( classElementName.equals( TIME_MENTION ) || classElementName.equals( DATE_ANNOTATION ) ) {
+ nlpAttributes.add( new DefaultAttribute( "XMI_TIMEX", "DATE" ) );
+ } else {
+ final String conceptIdArray = entityElement.getAttributeValue( CONCEPT_ARRAY );
+ if ( conceptIdArray != null ) {
+ final String[] conceptIds = conceptIdArray.split( "\\s+" );
+ for ( String conceptId : conceptIds ) {
+ nlpAttributes.addAll( parseUmlsAttributes( rootElement, conceptId ) );
+ }
+ }
+ }
+ nlpAttributes.add( new DefaultAttribute( DefinedAttributeType.UNIQUE_ID, entityId ) );
+ final Entity entity = new DefaultEntity( textSpan, spannedText, classType,
+ nlpAttributes
+ .toArray( new org.chboston.cnlp.nlp.annotation.attribute.Attribute[ nlpAttributes.size() ] ) );
+ entityMap.put( entityId, entity );
+ }
+ return entityMap;
+ }
+
+
+ static private Collection<org.chboston.cnlp.nlp.annotation.attribute.Attribute> createNlpAttributes(
+ final Element element ) {
+ final Collection<Attribute> nonIdAttributes = getNonIdXmlAttributes( element );
+ final Collection<org.chboston.cnlp.nlp.annotation.attribute.Attribute> attributeList
+ = new ArrayList<>( nonIdAttributes.size() );
+ for ( Attribute xmlAttribute : nonIdAttributes ) {
+ attributeList.add( createNlpAttribute( xmlAttribute.getName(), xmlAttribute.getValue() ) );
+ }
+ return attributeList;
+ }
+
+ static private org.chboston.cnlp.nlp.annotation.attribute.Attribute createNlpAttribute( final String name,
+ final String value ) {
+ final AttributeType attributeType = AttributeTypeFactory.getAttributeForName( name );
+ if ( attributeType == DefinedAttributeType.POLARITY && value.equals( "-1" ) ) {
+ return new DefaultAttribute( DefinedAttributeType.POLARITY, "NEG" );
+ }
+ return new DefaultAttribute( attributeType.getName(), value );
+ }
+
+
+ static private ClassType getEntityClassType( final String classTypeName ) {
+ // TODO - add more class types. See Knowtator xml parser for possible list
+ switch ( classTypeName ) {
+ case EVENT_MENTION:
+ return TemporalClassType.EVENT;
+ case TIME_MENTION:
+ return TemporalClassType.TIMEX;
+ case DATE_ANNOTATION:
+ return TemporalClassType.TIMEX;
+ case SIGN_SYMPTOM:
+ return SemanticClassType.SIGN_OR_SYMPTOM;
+ case DISEASE:
+ return SemanticClassType.DISEASE_DISORDER;
+ case ANATOMIC_SITE:
+ return SemanticClassType.ANATOMICAL_SITE;
+ case MEDICATION:
+ return SemanticClassType.MEDICATION;
+ case PROCEDURE:
+ return SemanticClassType.PROCEDURE;
+ case "generic_class":
+ return SemanticClassType.MISC;
+ }
+ return new CustomClassType( classTypeName );
+ }
+
+
+ static private Collection<org.chboston.cnlp.nlp.annotation.attribute.Attribute> parseEventAttributes(
+ final Element rootElement, final String eventId,
+ final Map<String, Element> eventMap, final Map<String, Element> eventPropertiesMap ) {
+ final Element eventElement = eventMap.get( eventId );
+ if ( eventElement == null ) {
+ return Collections.emptyList();
+ }
+ final String propertiesId = eventElement.getAttributeValue( PROPERTIES_ID_KEY );
+ if ( propertiesId == null ) {
+ return Collections.emptyList();
+ }
+ final Element propertiesElement = eventPropertiesMap.get( propertiesId );
+ if ( propertiesElement == null ) {
+ return Collections.emptyList();
+ }
+ return createNlpAttributes( propertiesElement );
+ }
+
+ static private Collection<org.chboston.cnlp.nlp.annotation.attribute.Attribute> parseUmlsAttributes(
+ final Element rootElement, final String conceptId ) {
+ final Element conceptElement = getIdElement( rootElement, UMLS_CONCEPT, conceptId );
+ if ( conceptElement == null ) {
+ return Collections.emptyList();
+ }
+ return createNlpAttributes( conceptElement );
+ }
+
+
+ static private Element getIdElement( final Element rootElement, final String elementTypeName,
+ final String elementId ) {
+ final List<Element> childElements = getChildren( rootElement, elementTypeName );
+ for ( Element childElement : childElements ) {
+ if ( getElementId( childElement ).equals( elementId ) ) {
+ return childElement;
+ }
+ }
+ return null;
+ }
+
+ static private String getElementId( final Element element ) {
+ final List<Attribute> xmiAttributes = element.getAttributes();
+ if ( xmiAttributes != null ) {
+ for ( Attribute xmiAttribute : xmiAttributes ) {
+ final String attributeName = xmiAttribute.getName();
+ final String attributeValue = xmiAttribute.getValue();
+ if ( attributeName.equals( ID ) && !attributeValue.equals( "0" ) ) {
+ return attributeValue;
+ }
+ }
+ }
+ return "";
+ }
+
+ static private Collection<Attribute> getNonIdXmlAttributes( final Element element ) {
+ final List<Attribute> xmiAttributes = element.getAttributes();
+ final Collection<Attribute> nonIdAttributes = new ArrayList<>( xmiAttributes.size() - 1 );
+ for ( Attribute xmiAttribute : xmiAttributes ) {
+ final String attributeName = xmiAttribute.getName();
+ if ( !attributeName.equals( ID ) ) {
+ nonIdAttributes.add( xmiAttribute );
+ }
+ }
+ return nonIdAttributes;
+ }
+
+ /**
+ * @param rootElement xml root element
+ * @param entityMap map of elementIDs and Entities
+ * @return list of Relations created with all the given information
+ */
+ static private List<Relation> getTLinks( final Element rootElement,
+ final Map<String, Entity> entityMap,
+ final Map<String, String> relationArgMap ) {
+ if ( entityMap.isEmpty() ) {
+ return Collections.emptyList();
+ }
+ final List<Relation> relationList = new ArrayList<>();
+ final List<Element> relationElementList = getChildren( rootElement, TLINK );
+ final List<org.chboston.cnlp.nlp.annotation.attribute.Attribute> attributeList = new ArrayList<>();
+ for ( Element relationElement : relationElementList ) {
+ attributeList.clear();
+ final String relationId = getElementId( relationElement );
+ // TODO make classtype tlink
+ String sourceEntityId = "";
+ String targetEntityId = "";
+ final Collection<Attribute> nonIdAttributes = getNonIdXmlAttributes( relationElement );
+ for ( Attribute xmlAttribute : nonIdAttributes ) {
+ final String attributeName = xmlAttribute.getName();
+ final String attributeValue = xmlAttribute.getValue();
+ if ( attributeName.equalsIgnoreCase( LINK_ARG1 ) ) {
+ sourceEntityId = attributeValue;
+ } else if ( attributeName.equalsIgnoreCase( LINK_ARG2 ) ) {
+ targetEntityId = attributeValue;
+ } else if ( attributeName.equalsIgnoreCase( LINK_TYPE ) ) {
+ attributeList.add( createNlpAttribute( DefinedAttributeType.RELATION_TYPE.getName(), attributeValue ) );
+ } else {
+ attributeList.add( createNlpAttribute( attributeName, attributeValue ) );
+ }
+ }
+ if ( sourceEntityId.isEmpty() || targetEntityId.isEmpty() ) {
+ LOGGER.severe( "Relation " + relationId
+ + " has no Source " + sourceEntityId
+ + " and/or no Target " + targetEntityId );
+ continue;
+ }
+ final String realSource = relationArgMap.get( sourceEntityId );
+ final String realTarget = relationArgMap.get( targetEntityId );
+ if ( realSource == null || realTarget == null ) {
+ LOGGER.severe( "Relation " + relationId
+ + " has no Source " + sourceEntityId
+ + " and/or no Target " + targetEntityId );
+ continue;
+ }
+
+ final Entity entity1 = entityMap.get( realSource );
+ final Entity entity2 = entityMap.get( realTarget );
+ if ( entity1 == null || entity2 == null ) {
+ LOGGER.severe( "Relation " + relationId
+ + " Source " + realSource
+ + " and/or Target " + realTarget + " does not exist" );
+ continue;
+ }
+ attributeList.add( new DefaultAttribute( DefinedAttributeType.UNIQUE_ID, relationId ) );
+ final Relation relation = new DefaultRelation( entity1, entity2, TemporalClassType.TLINK,
+ attributeList
+ .toArray( new org.chboston.cnlp.nlp.annotation.attribute.Attribute[ attributeList.size() ] ) );
+ relationList.add( relation );
+ }
+ return relationList;
+ }
+
+
+ /**
+ * @param rootElement xml root element
+ * @param entityMap map of elementIDs and Entities
+ * @param relationArgMap map of argument elementIDs and entity elementIDs
+ * @return list of TextSpan pairs for Coreference Relations created with all the given information
+ */
+ static private List<Collection<TextSpan>> getCorefLinks( final Element rootElement,
+ final Map<String, Entity> entityMap,
+ final Map<String, String> relationArgMap ) {
+ if ( entityMap.isEmpty() ) {
+ return Collections.emptyList();
+ }
+ final List<Collection<TextSpan>> corefList = new ArrayList<>();
+ final List<Element> relationElementList = getChildren( rootElement, COREF_RELATION );
+ for ( Element relationElement : relationElementList ) {
+ final String relationId = getElementId( relationElement );
+ String sourceEntityId = "";
+ String targetEntityId = "";
+ final Collection<Attribute> nonIdAttributes = getNonIdXmlAttributes( relationElement );
+ for ( Attribute xmlAttribute : nonIdAttributes ) {
+ final String attributeName = xmlAttribute.getName();
+ final String attributeValue = xmlAttribute.getValue();
+ if ( attributeName.equalsIgnoreCase( LINK_ARG1 ) ) {
+ sourceEntityId = attributeValue;
+ } else if ( attributeName.equalsIgnoreCase( LINK_ARG2 ) ) {
+ targetEntityId = attributeValue;
+ }
+ }
+ if ( sourceEntityId.isEmpty() || targetEntityId.isEmpty() ) {
+ LOGGER.severe( "Relation " + relationId
+ + " has no Source " + sourceEntityId
+ + " and/or no Target " + targetEntityId );
+ continue;
+ }
+ final String realSource = relationArgMap.get( sourceEntityId );
+ final String realTarget = relationArgMap.get( targetEntityId );
+ if ( realSource == null || realTarget == null ) {
+ LOGGER.severe( "Relation " + relationId
+ + " has no Source " + sourceEntityId
+ + " and/or no Target " + targetEntityId );
+ continue;
+ }
+ final Entity entity1 = entityMap.get( realSource );
+ final Entity entity2 = entityMap.get( realTarget );
+ if ( entity1 == null || entity2 == null ) {
+ LOGGER.severe( "Relation " + relationId
+ + " Source " + realSource
+ + " and/or Target " + realTarget + " does not exist" );
+ continue;
+ }
+ final Collection<TextSpan> textSpans = new ArrayList<>( 2 );
+ textSpans.add( entity1.getTextSpan() );
+ textSpans.add( entity2.getTextSpan() );
+ corefList.add( textSpans );
+ }
+ return Collections.unmodifiableList( corefList );
+ }
+
+
+ /**
+ * @param rootElement xml root element
+ * @param entityMap map of elementIDs and Entities
+ * @param relationArgMap map of argument elementIDs and entity elementIDs
+ * @return list of TextSpan pairs for Coreference Relations created with all the given information
+ */
+ static private List<Collection<TextSpan>> getCorefChains( final Element rootElement,
+ final Map<String, Entity> entityMap,
+ final Map<String, String> relationArgMap ) {
+ if ( entityMap.isEmpty() ) {
+ return Collections.emptyList();
+ }
+ final Pattern memberSplitter = Pattern.compile( "\\s+" );
+ final List<Collection<TextSpan>> corefList = new ArrayList<>();
+ final List<Element> relationElementList = getChildren( rootElement, COREF_CHAIN );
+ final Collection<String> entityIds = new ArrayList<>();
+ for ( Element relationElement : relationElementList ) {
+ entityIds.clear();
+ final String relationId = getElementId( relationElement );
+ final Collection<Attribute> nonIdAttributes = getNonIdXmlAttributes( relationElement );
+ String[] argumentIDs = null;
+ for ( Attribute xmlAttribute : nonIdAttributes ) {
+ final String attributeName = xmlAttribute.getName();
+ final String attributeValue = xmlAttribute.getValue();
+ if ( attributeName.equalsIgnoreCase( COREF_MEMBERS ) ) {
+ argumentIDs = memberSplitter.split( attributeValue );
+ break;
+ }
+ }
+ if ( argumentIDs == null || argumentIDs.length == 0 ) {
+ LOGGER.severe( "Relation " + relationId + " has no Members" );
+ continue;
+ }
+ if ( argumentIDs.length == 1 ) {
+ LOGGER.severe( "Relation " + relationId + " has only one Member " + argumentIDs[ 0 ] );
+ continue;
+ }
+ for ( String argumentId : argumentIDs ) {
+ final String realSource = relationArgMap.get( argumentId );
+ if ( realSource == null ) {
+ LOGGER.severe( "Relation " + relationId
+ + " has no Source " + argumentId );
+ continue;
+ }
+ entityIds.add( realSource );
+ }
+ final Collection<TextSpan> textSpans = new ArrayList<>( 2 );
+ for ( String entityId : entityIds ) {
+ final Entity entity = entityMap.get( entityId );
+ if ( entity == null ) {
+ LOGGER.severe( "Relation " + relationId + " Entity ID " + entityId + " does not exist" );
+ continue;
+ }
+ textSpans.add( entity.getTextSpan() );
+ }
+ if ( textSpans.size() > 1 ) {
+ corefList.add( textSpans );
+ }
+ }
+ return Collections.unmodifiableList( corefList );
+ }
+
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ protected TextSpan createEntityTextSpan( final Element spanElement ) {
+ int begin = 0;
+ int end = 0;
+ try {
+ begin = spanElement.getAttribute( BEGIN ).getIntValue();
+ end = spanElement.getAttribute( END ).getIntValue();
+ } catch ( JDOMException jdomE ) {
+ LOGGER.severe( jdomE.getMessage() );
+ return BAD_TEXT_SPAN;
+ }
+ return new DefaultTextSpan( begin, end );
+ }
+
+ /**
+ * Anafora XML does not provide actual text, but the document text may be known.
+ * If the document text is known then this simply returns a substring, otherwise a string of 'A'
+ *
+ * @param textSpan -
+ * @return The spanned text within provided document text, or a String filled with character 'A'
+ */
+ static private String getSpannedText( final TextSpan textSpan, final String documentText ) {
+ if ( documentText == null || documentText.isEmpty() ) {
+ return fakeSomeText( textSpan );
+ }
+ final int startIndex = textSpan.getStartIndex();
+ final int endIndex = textSpan.getEndIndex();
+ if ( startIndex >= 0 && endIndex < documentText.length() ) {
+ return documentText.substring( startIndex, endIndex );
+ }
+ return fakeSomeText( textSpan );
+ }
+
+ /**
+ * Anafora XML does not provide actual text, so we need to fake it.
+ * This will knock some of the IAA capabilities, such as Alpha computations based upon word count,
+ * marked comparison by word count, etc.
+ *
+ * @param textSpan -
+ * @return A String the length of the textSpan filled with the character 'A'
+ */
+ static private String fakeSomeText( final TextSpan textSpan ) {
+ if ( textSpan instanceof DiscontiguousTextSpan ) {
+ final TextSpan jointTextSpan = new DefaultTextSpan( textSpan.getStartIndex(), textSpan.getEndIndex() );
+ return fakeSomeText( jointTextSpan );
+ }
+ final char[] chars = new char[ textSpan.getLength() ];
+ Arrays.fill( chars, 'A' );
+ return String.valueOf( chars );
+ }
+
+
+ static private List<Element> getChildren( final Element rootElement, final String name ) {
+ final List<?> rootChildren = rootElement.getChildren();
+ final List<Element> children = new ArrayList<>( rootChildren.size() );
+ for ( Object child : rootChildren ) {
+ if ( child instanceof Element && ((Element)child).getName().equals( name ) ) {
+ children.add( (Element)child );
+ }
+ }
+ return children;
+ }
+
+ static private Element getChild( final Element rootElement, final String name ) {
+ final List<Element> children = getChildren( rootElement, name );
+ if ( children.isEmpty() ) {
+ return null;
+ }
+ return children.get( 0 );
+ }
+
+
+ static private void testParse( final String filePath ) {
+ final SAXBuilder saxBuilder = new SAXBuilder();
+ try {
+ final Document document = saxBuilder.build( filePath );
+ final Element rootElement = document.getRootElement();
+ final List<?> rootChildren = rootElement.getChildren();
+ for ( Object child : rootChildren ) {
+ if ( child instanceof Element ) {
+ final Element element = (Element)child;
+ System.out.println( element.getName() );
+ final List<?> stuff = element.getAttributes();
+ for ( Object thing : stuff ) {
+ if ( thing instanceof Attribute ) {
+ final Attribute attribute = (Attribute)thing;
+ System.out.println( "\t" + attribute.getName() + " = " + attribute.getValue() );
+ }
+ }
+ } else {
+ LOGGER.warning( " NOT ELEMENT " + child.toString() );
+ }
+ }
+ getDocumentText( rootElement );
+ } catch ( JDOMException | IOException multE ) {
+ LOGGER.severe( multE.getMessage() );
+ }
+ }
+
+
+ public static void main( String[] args ) {
+ final File inputDir = new File( INPUT_DIR_PATH );
+ final File[] files = inputDir.listFiles();
+ if ( files == null ) {
+ LOGGER.warning( "No files in " + inputDir.getPath() );
+ System.exit( 0 );
+ }
+ for ( File file : files ) {
+// if ( !file.getName().endsWith( ".old" ) ) {
+ if ( !file.getName().endsWith( "_report_4.txt.xmi.old" ) ) {
+ continue;
+ }
+ testParse( file.getPath() );
+ break;
+ }
+ }
+
+}
Added: ctakes/sandbox/timelanes/org/chboston/cnlp/xmi/parser/XmiEntitySegregator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/xmi/parser/XmiEntitySegregator.java?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/xmi/parser/XmiEntitySegregator.java (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/xmi/parser/XmiEntitySegregator.java Thu Feb 19 18:06:13 2015
@@ -0,0 +1,74 @@
+package org.chboston.cnlp.xmi.parser;
+
+import org.chboston.cnlp.nlp.annotation.annotation.AnnotationSpanComparator;
+import org.chboston.cnlp.nlp.annotation.classtype.ClassType;
+import org.chboston.cnlp.nlp.annotation.classtype.TemporalClassType;
+import org.chboston.cnlp.nlp.annotation.entity.Entity;
+import org.chboston.cnlp.nlp.annotation.textspan.TextSpan;
+
+import java.util.*;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 3/28/13
+ */
+final public class XmiEntitySegregator {
+
+ private XmiEntitySegregator() {
+ }
+
+ /**
+ * @param entities collection of entities
+ * @return all named entities with the given collection of entities
+ */
+ static public List<Entity> getNamedEntities( final Iterable<Entity> entities ) {
+ final List<Entity> namedEntityList = new ArrayList<>();
+ for ( Entity entity : entities ) {
+ final ClassType type = entity.getClassType();
+ if ( type != TemporalClassType.EVENT && type != TemporalClassType.TIMEX ) {
+ namedEntityList.add( entity );
+ }
+ }
+ Collections.sort( namedEntityList, AnnotationSpanComparator.getInstance() );
+ return Collections.unmodifiableList( namedEntityList );
+ }
+
+ /**
+ * @param entities collection of entities
+ * @return all events with the given collection of entities
+ */
+ static public List<Entity> getEvents( final Iterable<Entity> entities ) {
+ final List<Entity> eventList = new ArrayList<>();
+ for ( Entity entity : entities ) {
+ final ClassType type = entity.getClassType();
+ if ( type == TemporalClassType.EVENT ) {
+ eventList.add( entity );
+ }
+ }
+ Collections.sort( eventList, AnnotationSpanComparator.getInstance() );
+ return Collections.unmodifiableList( eventList );
+ }
+
+ /**
+ * @param entities collection of entities
+ * @return all timex3 times with the given collection of entities
+ */
+ static public List<Entity> getTimes( final Iterable<Entity> entities ) {
+ // XMI has Date, Time, and Timex3 annotations. Many will overlap. We only want one per textSpan.
+ final Map<TextSpan, Entity> textSpanTimes = new HashMap<>();
+ for ( Entity entity : entities ) {
+ final ClassType type = entity.getClassType();
+ if ( type == TemporalClassType.TIMEX ) {
+ textSpanTimes.put( entity.getTextSpan(), entity );
+ }
+ }
+ final List<Entity> timexList = new ArrayList<>();
+ for ( Entity entity : textSpanTimes.values() ) {
+ timexList.add( entity );
+ }
+ Collections.sort( timexList, AnnotationSpanComparator.getInstance() );
+ return Collections.unmodifiableList( timexList );
+ }
+
+}