You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2015/07/10 04:33:50 UTC
svn commit: r1690191 - in
/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty:
cell/DefaultUmlsItemCell.java cell/EventCell.java cell/TimexCell.java
plaintext/PrettyTextWriter.java
Author: seanfinan
Date: Fri Jul 10 02:33:50 2015
New Revision: 1690191
URL: http://svn.apache.org/r1690191
Log:
CTAKES-366 Upgraded PrettyTextWriter to mark Events that aren't already Semantically typed, mark Temporal Expressions, and list temporal relations
Added:
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/EventCell.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/TimexCell.java
Modified:
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/DefaultUmlsItemCell.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriter.java
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/DefaultUmlsItemCell.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/DefaultUmlsItemCell.java?rev=1690191&r1=1690190&r2=1690191&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/DefaultUmlsItemCell.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/DefaultUmlsItemCell.java Fri Jul 10 02:33:50 2015
@@ -120,4 +120,19 @@ public final class DefaultUmlsItemCell e
return "";
}
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public boolean equals( final Object other ) {
+ return other instanceof DefaultUmlsItemCell
+ && getTextSpan().equals( ((DefaultUmlsItemCell)other).getTextSpan() )
+ && isNegated() == ((DefaultUmlsItemCell)other).isNegated()
+ && _semanticTextLines.equals( ((DefaultUmlsItemCell)other)._semanticTextLines );
+ }
+
+ public int hashCode() {
+ return 2 * getTextSpan().hashCode() + 2 * _semanticTextLines.hashCode() + (isNegated() ? 1 : 0);
+ }
+
}
Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/EventCell.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/EventCell.java?rev=1690191&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/EventCell.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/EventCell.java Fri Jul 10 02:33:50 2015
@@ -0,0 +1,98 @@
+package org.apache.ctakes.core.cc.pretty.cell;
+
+import org.apache.ctakes.core.cc.pretty.textspan.TextSpan;
+
+import static org.apache.ctakes.core.cc.pretty.cell.DefaultUmlsItemCell.ENTITY_FILL;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 7/9/2015
+ */
+public class EventCell extends AbstractItemCell {
+
+ static private final int EVENT_SPAN = 5;
+ static private final String EVENT_TEXT = "Event";
+ static private final int NEGATED_SPAN = 8;
+ static private final String NEGATED_TEXT = "Negated";
+
+ final private boolean _negated;
+
+ public EventCell( final TextSpan textSpan, final int polarity ) {
+ super( textSpan );
+ _negated = polarity < 0;
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * @return the maximum of the document text length and the length of "Event" (5)
+ */
+ @Override
+ public int getWidth() {
+ return Math.max( getTextSpan().getWidth(), isNegated() ? NEGATED_SPAN : EVENT_SPAN );
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * @return the 1 for the text span representation line + 1 for the label + 1 if negated
+ */
+ @Override
+ public int getHeight() {
+ return 2 + (isNegated() ? 1 : 0);
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * @return {@link UmlsItemCell#ENTITY_FILL}
+ */
+ @Override
+ public String getText() {
+ return ENTITY_FILL;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+// @Override
+ public boolean isNegated() {
+ return _negated;
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * @return {@link UmlsItemCell#ENTITY_FILL} for index 0, Semantic types and Cuis for lines after that, then negated
+ */
+ @Override
+ public String getLineText( final int lineIndex ) {
+ switch ( lineIndex ) {
+ case 0:
+ return ENTITY_FILL;
+ case 1:
+ return EVENT_TEXT;
+ case 2:
+ if ( isNegated() ) {
+ return NEGATED_TEXT;
+ }
+ }
+ return "";
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public boolean equals( final Object other ) {
+ return other instanceof EventCell
+ && getTextSpan().equals( ((EventCell)other).getTextSpan() )
+ && isNegated() == ((EventCell)other).isNegated();
+ }
+
+ public int hashCode() {
+ return 2 * getTextSpan().hashCode() + (isNegated() ? 1 : 0);
+ }
+
+}
Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/TimexCell.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/TimexCell.java?rev=1690191&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/TimexCell.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/TimexCell.java Fri Jul 10 02:33:50 2015
@@ -0,0 +1,83 @@
+package org.apache.ctakes.core.cc.pretty.cell;
+
+import org.apache.ctakes.core.cc.pretty.textspan.TextSpan;
+
+import static org.apache.ctakes.core.cc.pretty.cell.DefaultUmlsItemCell.ENTITY_FILL;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 7/9/2015
+ */
+public class TimexCell extends AbstractItemCell {
+
+ // Return Code used to indicate that a full entity span should be filled with an indicator character, e.g. '-'
+// static public final String TIMEX_FILL = "TIMEX_FILL";
+
+ static private final int TIMEX_SPAN = 5;
+ static private final String TIMEX_TEXT = "Timex";
+
+ public TimexCell( final TextSpan textSpan ) {
+ super( textSpan );
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * @return the maximum of the document text length and the length of "Timex" (5)
+ */
+ @Override
+ public int getWidth() {
+ return Math.max( getTextSpan().getWidth(), TIMEX_SPAN );
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * @return the 1 for the text span representation line + 1 for the label
+ */
+ @Override
+ public int getHeight() {
+ return 2;
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * @return {@link org.apache.ctakes.core.cc.pretty.cell.UmlsItemCell#ENTITY_FILL}
+ */
+ @Override
+ public String getText() {
+ return ENTITY_FILL;
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * @return {@link org.apache.ctakes.core.cc.pretty.cell.UmlsItemCell#ENTITY_FILL} for index 0, Semantic types and Cuis for lines after that, then negated
+ */
+ @Override
+ public String getLineText( final int lineIndex ) {
+ switch ( lineIndex ) {
+ case 0:
+ return ENTITY_FILL;
+ case 1:
+ return TIMEX_TEXT;
+ }
+ return "";
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public boolean equals( final Object other ) {
+ return other instanceof TimexCell
+ && getTextSpan().equals( ((TimexCell)other).getTextSpan() );
+ }
+
+ public int hashCode() {
+ return 3 * getTextSpan().hashCode();
+ }
+
+}
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriter.java?rev=1690191&r1=1690190&r2=1690191&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriter.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriter.java Fri Jul 10 02:33:50 2015
@@ -1,9 +1,7 @@
package org.apache.ctakes.core.cc.pretty.plaintext;
import org.apache.ctakes.core.cc.pretty.SemanticGroup;
-import org.apache.ctakes.core.cc.pretty.cell.DefaultBaseItemCell;
-import org.apache.ctakes.core.cc.pretty.cell.DefaultUmlsItemCell;
-import org.apache.ctakes.core.cc.pretty.cell.ItemCell;
+import org.apache.ctakes.core.cc.pretty.cell.*;
import org.apache.ctakes.core.cc.pretty.row.DefaultItemRow;
import org.apache.ctakes.core.cc.pretty.row.ItemRow;
import org.apache.ctakes.core.cc.pretty.textspan.DefaultTextSpan;
@@ -11,15 +9,19 @@ import org.apache.ctakes.core.cc.pretty.
import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
import org.apache.ctakes.core.util.IdentifiedAnnotationUtil;
import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
+import org.apache.ctakes.typesystem.type.relation.TemporalTextRelation;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.log4j.Logger;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
import java.io.BufferedWriter;
import java.io.File;
@@ -119,16 +121,13 @@ final public class PrettyTextWriter {
}
itemRows.add( baseItemRow );
itemRows.addAll( createItemRows( coveringItemMap ) );
- // Create list of all text span offsets
- final Collection<Integer> offsets = new HashSet<>();
- for ( TextSpan textSpan : baseItemMap.keySet() ) {
- offsets.add( textSpan.getBegin() );
- offsets.add( textSpan.getEnd() );
- }
// Create map of all text span offsets to adjusted offsets
- final Map<Integer, Integer> offsetAdjustedMap = createOffsetAdjustedMap( offsets, itemRows );
+ final Map<Integer, Integer> offsetAdjustedMap = createOffsetAdjustedMap( itemRows );
// print all of the item rows
printItemRows( offsetAdjustedMap, itemRows, writer );
+
+ printTLinks( jcas, sentence, writer );
+ writer.newLine();
}
/**
@@ -159,33 +158,56 @@ final public class PrettyTextWriter {
return baseItemMap;
}
+
/**
- * @param jcas ye olde ...
- * @param sentence annotation containing the sentence
+ *
+ * @param jcas ye olde ...
+ * @param sentence annotation containing the sentence
* @param baseItemMap map of text spans and item cells that represent those spans
- * @return map of covering annotations (item cells that cover more than one base cell)
+ * @return map of number of spanned base items and collections of item cells spanning that number of base item cells
*/
static private Map<Integer, Collection<ItemCell>> createCoveringItemMap( final JCas jcas,
final AnnotationFS sentence,
final Map<TextSpan, ItemCell> baseItemMap ) {
+ final Collection<TextSpan> usedTextSpans = new HashSet<>();
+ final Collection<ItemCell> requiredCells = new HashSet<>();
+ final Collection<ItemCell> eventCells = new HashSet<>();
+
final int sentenceBegin = sentence.getBegin();
final Collection<IdentifiedAnnotation> identifiedAnnotations
= JCasUtil.selectCovered( jcas, IdentifiedAnnotation.class, sentence );
- final Map<Integer, Collection<ItemCell>> coveringAnnotationMap = new HashMap<>();
- for ( IdentifiedAnnotation identifiedAnnotation : identifiedAnnotations ) {
- final Map<String, Collection<String>> semanticCuis = getSemanticCuis( identifiedAnnotation );
- if ( semanticCuis.isEmpty() ) {
+ for ( IdentifiedAnnotation annotation : identifiedAnnotations ) {
+ final TextSpan textSpan = new DefaultTextSpan( annotation, sentenceBegin );
+ if ( textSpan.getWidth() == 0 ) {
continue;
}
- final TextSpan textSpan = new DefaultTextSpan( identifiedAnnotation, sentenceBegin );
- if ( textSpan.getWidth() == 0 ) {
+ final Map<String, Collection<String>> semanticCuis = getSemanticCuis( annotation );
+ if ( !semanticCuis.isEmpty() ) {
+ final ItemCell itemCell = new DefaultUmlsItemCell( textSpan, annotation.getPolarity(), semanticCuis );
+ requiredCells.add( itemCell );
+ usedTextSpans.add( textSpan );
+ } else if ( annotation instanceof TimeMention ) {
+ requiredCells.add( new TimexCell( textSpan ) );
+ } else if ( annotation instanceof EventMention ) {
+ eventCells.add( new EventCell( textSpan, annotation.getPolarity() ) );
+ }
+ }
+ final Map<Integer, Collection<ItemCell>> coveringAnnotationMap = new HashMap<>();
+ for ( ItemCell itemCell : requiredCells ) {
+ final Collection<ItemCell> coveredBaseItems = getCoveredBaseItems( itemCell.getTextSpan(), baseItemMap );
+ Collection<ItemCell> coveringAnnotations = coveringAnnotationMap.get( coveredBaseItems.size() );
+ if ( coveringAnnotations == null ) {
+ coveringAnnotations = new HashSet<>();
+ coveringAnnotationMap.put( coveredBaseItems.size(), coveringAnnotations );
+ }
+ coveringAnnotations.add( itemCell );
+ }
+ for ( ItemCell itemCell : eventCells ) {
+ if ( usedTextSpans.contains( itemCell.getTextSpan() ) ) {
continue;
}
- final ItemCell itemCell = new DefaultUmlsItemCell( textSpan, identifiedAnnotation
- .getPolarity(), semanticCuis );
- final Collection<ItemCell> coveredBaseItems = getCoveredBaseItems( textSpan, baseItemMap );
- Collection<ItemCell> coveringAnnotations
- = coveringAnnotationMap.get( coveredBaseItems.size() );
+ final Collection<ItemCell> coveredBaseItems = getCoveredBaseItems( itemCell.getTextSpan(), baseItemMap );
+ Collection<ItemCell> coveringAnnotations = coveringAnnotationMap.get( coveredBaseItems.size() );
if ( coveringAnnotations == null ) {
coveringAnnotations = new HashSet<>();
coveringAnnotationMap.put( coveredBaseItems.size(), coveringAnnotations );
@@ -195,13 +217,21 @@ final public class PrettyTextWriter {
return coveringAnnotationMap;
}
+
/**
- * @param offsets original document offsets
* @param itemRows item rows
* @return map of original document offsets to adjusted printable offsets
*/
- static private Map<Integer, Integer> createOffsetAdjustedMap( final Collection<Integer> offsets,
- final Iterable<ItemRow> itemRows ) {
+ static private Map<Integer, Integer> createOffsetAdjustedMap( final Iterable<ItemRow> itemRows ) {
+ // Create list of all text span offsets. Had to be here because BaseTokens did not contain all offsets.
+ final Collection<Integer> offsets = new HashSet<>();
+ for ( ItemRow itemRow : itemRows ) {
+ final Collection<ItemCell> rowItemCells = itemRow.getItemCells();
+ for ( ItemCell itemCell : rowItemCells ) {
+ offsets.add( itemCell.getTextSpan().getBegin() );
+ offsets.add( itemCell.getTextSpan().getEnd() );
+ }
+ }
// Create map of all text span offsets to adjusted offsets
final List<Integer> offsetList = new ArrayList<>( offsets );
Collections.sort( offsetList );
@@ -251,16 +281,54 @@ final public class PrettyTextWriter {
final String lineText = itemRow.getTextLine( i, rowWidth, offsetAdjustedMap );
if ( !lineText.isEmpty() ) {
if ( firstLine ) {
- writer.write( "TEXT: " + lineText );
+ writer.write( "SENTENCE: " + lineText );
firstLine = false;
} else {
- writer.write( " " + lineText );
+ writer.write( " " + lineText );
}
writer.newLine();
}
}
}
+ }
+
+
+ /**
+ * Print TLinks as "arg1 relationType arg2"
+ * @param jcas ye olde ...
+ * @param sentence annotation containing the sentence
+ * @param writer writer to which pretty text for the sentence should be written
+ * @throws IOException if the writer has issues
+ */
+ static private void printTLinks( final JCas jcas,
+ final AnnotationFS sentence,
+ final BufferedWriter writer ) throws IOException {
+ final Collection<TemporalTextRelation> tlinks = JCasUtil.select( jcas, TemporalTextRelation.class );
+ if ( tlinks == null || tlinks.isEmpty() ) {
+ return;
+ }
+ final Collection<TemporalTextRelation> sentenceTlinks = new ArrayList<>();
+ final TextSpan sentenceTextSpan = new DefaultTextSpan( sentence.getBegin(), sentence.getEnd() );
+ for ( TemporalTextRelation tlink : tlinks ) {
+ final Annotation argument1 = tlink.getArg1().getArgument();
+ final TextSpan argument1Span = new DefaultTextSpan( argument1, 0 );
+ if ( sentenceTextSpan.overlaps( argument1Span ) ) {
+ sentenceTlinks.add( tlink );
+ }
+ }
+ if ( sentenceTlinks.isEmpty() ) {
+ return;
+ }
+ final StringBuilder sb = new StringBuilder();
+ sb.append( "TLINKS: " );
+ for ( TemporalTextRelation tlink : sentenceTlinks ) {
+ sb.append( tlink.getArg1().getArgument().getCoveredText() ).append( " " );
+ sb.append( tlink.getCategory() ).append( " " );
+ sb.append( tlink.getArg2().getArgument().getCoveredText() ).append( " , " );
+ }
+ sb.setLength( sb.length() - 3 );
+ writer.write( sb.toString() );
writer.newLine();
}