You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2015/07/10 04:33:50 UTC

svn commit: r1690191 - in /ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty: cell/DefaultUmlsItemCell.java cell/EventCell.java cell/TimexCell.java plaintext/PrettyTextWriter.java

Author: seanfinan
Date: Fri Jul 10 02:33:50 2015
New Revision: 1690191

URL: http://svn.apache.org/r1690191
Log:
CTAKES-366  Upgraded PrettyTextWriter to mark Events that aren't already Semantically typed, mark Temporal Expressions, and list temporal relations

Added:
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/EventCell.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/TimexCell.java
Modified:
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/DefaultUmlsItemCell.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriter.java

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/DefaultUmlsItemCell.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/DefaultUmlsItemCell.java?rev=1690191&r1=1690190&r2=1690191&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/DefaultUmlsItemCell.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/DefaultUmlsItemCell.java Fri Jul 10 02:33:50 2015
@@ -120,4 +120,19 @@ public final class DefaultUmlsItemCell e
       return "";
    }
 
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean equals( final Object other ) {
+      return other instanceof DefaultUmlsItemCell
+             && getTextSpan().equals( ((DefaultUmlsItemCell)other).getTextSpan() )
+             && isNegated() == ((DefaultUmlsItemCell)other).isNegated()
+             && _semanticTextLines.equals( ((DefaultUmlsItemCell)other)._semanticTextLines );
+   }
+
+   public int hashCode() {
+      return 2 * getTextSpan().hashCode() + 2 * _semanticTextLines.hashCode() + (isNegated() ? 1 : 0);
+   }
+
 }

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/EventCell.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/EventCell.java?rev=1690191&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/EventCell.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/EventCell.java Fri Jul 10 02:33:50 2015
@@ -0,0 +1,98 @@
+package org.apache.ctakes.core.cc.pretty.cell;
+
+import org.apache.ctakes.core.cc.pretty.textspan.TextSpan;
+
+import static org.apache.ctakes.core.cc.pretty.cell.DefaultUmlsItemCell.ENTITY_FILL;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 7/9/2015
+ */
+public class EventCell extends AbstractItemCell {
+
+   static private final int EVENT_SPAN = 5;
+   static private final String EVENT_TEXT = "Event";
+   static private final int NEGATED_SPAN = 8;
+   static private final String NEGATED_TEXT = "Negated";
+
+   final private boolean _negated;
+
+   public EventCell( final TextSpan textSpan, final int polarity ) {
+      super( textSpan );
+      _negated = polarity < 0;
+   }
+
+   /**
+    * {@inheritDoc}
+    *
+    * @return the maximum of the document text length and the length of "Event" (5)
+    */
+   @Override
+   public int getWidth() {
+      return Math.max( getTextSpan().getWidth(), isNegated() ? NEGATED_SPAN : EVENT_SPAN );
+   }
+
+   /**
+    * {@inheritDoc}
+    *
+    * @return the 1 for the text span representation line + 1 for the label + 1 if negated
+    */
+   @Override
+   public int getHeight() {
+      return 2 + (isNegated() ? 1 : 0);
+   }
+
+   /**
+    * {@inheritDoc}
+    *
+    * @return {@link UmlsItemCell#ENTITY_FILL}
+    */
+   @Override
+   public String getText() {
+      return ENTITY_FILL;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+//   @Override
+   public boolean isNegated() {
+      return _negated;
+   }
+
+   /**
+    * {@inheritDoc}
+    *
+    * @return {@link UmlsItemCell#ENTITY_FILL} for index 0, Semantic types and Cuis for lines after that, then negated
+    */
+   @Override
+   public String getLineText( final int lineIndex ) {
+      switch ( lineIndex ) {
+         case 0:
+            return ENTITY_FILL;
+         case 1:
+            return EVENT_TEXT;
+         case 2:
+            if ( isNegated() ) {
+               return NEGATED_TEXT;
+            }
+      }
+      return "";
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean equals( final Object other ) {
+      return other instanceof EventCell
+             && getTextSpan().equals( ((EventCell)other).getTextSpan() )
+             && isNegated() == ((EventCell)other).isNegated();
+   }
+
+   public int hashCode() {
+      return 2 * getTextSpan().hashCode() + (isNegated() ? 1 : 0);
+   }
+
+}

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/TimexCell.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/TimexCell.java?rev=1690191&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/TimexCell.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/TimexCell.java Fri Jul 10 02:33:50 2015
@@ -0,0 +1,83 @@
+package org.apache.ctakes.core.cc.pretty.cell;
+
+import org.apache.ctakes.core.cc.pretty.textspan.TextSpan;
+
+import static org.apache.ctakes.core.cc.pretty.cell.DefaultUmlsItemCell.ENTITY_FILL;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 7/9/2015
+ */
+public class TimexCell extends AbstractItemCell {
+
+   // Return Code used to indicate that a full entity span should be filled with an indicator character, e.g. '-'
+//   static public final String TIMEX_FILL = "TIMEX_FILL";
+
+   static private final int TIMEX_SPAN = 5;
+   static private final String TIMEX_TEXT = "Timex";
+
+   public TimexCell( final TextSpan textSpan ) {
+      super( textSpan );
+   }
+
+   /**
+    * {@inheritDoc}
+    *
+    * @return the maximum of the document text length and the length of "Timex" (5)
+    */
+   @Override
+   public int getWidth() {
+      return Math.max( getTextSpan().getWidth(), TIMEX_SPAN );
+   }
+
+   /**
+    * {@inheritDoc}
+    *
+    * @return the 1 for the text span representation line + 1 for the label
+    */
+   @Override
+   public int getHeight() {
+      return 2;
+   }
+
+   /**
+    * {@inheritDoc}
+    *
+    * @return {@link org.apache.ctakes.core.cc.pretty.cell.UmlsItemCell#ENTITY_FILL}
+    */
+   @Override
+   public String getText() {
+      return ENTITY_FILL;
+   }
+
+   /**
+    * {@inheritDoc}
+    *
+    * @return {@link org.apache.ctakes.core.cc.pretty.cell.UmlsItemCell#ENTITY_FILL} for index 0, Semantic types and Cuis for lines after that, then negated
+    */
+   @Override
+   public String getLineText( final int lineIndex ) {
+      switch ( lineIndex ) {
+         case 0:
+            return ENTITY_FILL;
+         case 1:
+            return TIMEX_TEXT;
+      }
+      return "";
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean equals( final Object other ) {
+      return other instanceof TimexCell
+             && getTextSpan().equals( ((TimexCell)other).getTextSpan() );
+   }
+
+   public int hashCode() {
+      return 3 * getTextSpan().hashCode();
+   }
+
+}

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriter.java?rev=1690191&r1=1690190&r2=1690191&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriter.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriter.java Fri Jul 10 02:33:50 2015
@@ -1,9 +1,7 @@
 package org.apache.ctakes.core.cc.pretty.plaintext;
 
 import org.apache.ctakes.core.cc.pretty.SemanticGroup;
-import org.apache.ctakes.core.cc.pretty.cell.DefaultBaseItemCell;
-import org.apache.ctakes.core.cc.pretty.cell.DefaultUmlsItemCell;
-import org.apache.ctakes.core.cc.pretty.cell.ItemCell;
+import org.apache.ctakes.core.cc.pretty.cell.*;
 import org.apache.ctakes.core.cc.pretty.row.DefaultItemRow;
 import org.apache.ctakes.core.cc.pretty.row.ItemRow;
 import org.apache.ctakes.core.cc.pretty.textspan.DefaultTextSpan;
@@ -11,15 +9,19 @@ import org.apache.ctakes.core.cc.pretty.
 import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
 import org.apache.ctakes.core.util.IdentifiedAnnotationUtil;
 import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
+import org.apache.ctakes.typesystem.type.relation.TemporalTextRelation;
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
 import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
 import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
 import org.apache.ctakes.typesystem.type.textspan.Sentence;
 import org.apache.log4j.Logger;
 import org.apache.uima.cas.text.AnnotationFS;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
 
 import java.io.BufferedWriter;
 import java.io.File;
@@ -119,16 +121,13 @@ final public class PrettyTextWriter {
       }
       itemRows.add( baseItemRow );
       itemRows.addAll( createItemRows( coveringItemMap ) );
-      // Create list of all text span offsets
-      final Collection<Integer> offsets = new HashSet<>();
-      for ( TextSpan textSpan : baseItemMap.keySet() ) {
-         offsets.add( textSpan.getBegin() );
-         offsets.add( textSpan.getEnd() );
-      }
       // Create map of all text span offsets to adjusted offsets
-      final Map<Integer, Integer> offsetAdjustedMap = createOffsetAdjustedMap( offsets, itemRows );
+      final Map<Integer, Integer> offsetAdjustedMap = createOffsetAdjustedMap( itemRows );
       // print all of the item rows
       printItemRows( offsetAdjustedMap, itemRows, writer );
+
+      printTLinks( jcas, sentence, writer );
+      writer.newLine();
    }
 
    /**
@@ -159,33 +158,56 @@ final public class PrettyTextWriter {
       return baseItemMap;
    }
 
+
    /**
-    * @param jcas        ye olde ...
-    * @param sentence    annotation containing the sentence
+    *
+    * @param jcas ye olde ...
+    * @param sentence annotation containing the sentence
     * @param baseItemMap map of text spans and item cells that represent those spans
-    * @return map of covering annotations (item cells that cover more than one base cell)
+    * @return map of number of spanned base items and collections of item cells spanning that number of base item cells
     */
    static private Map<Integer, Collection<ItemCell>> createCoveringItemMap( final JCas jcas,
                                                                             final AnnotationFS sentence,
                                                                             final Map<TextSpan, ItemCell> baseItemMap ) {
+      final Collection<TextSpan> usedTextSpans = new HashSet<>();
+      final Collection<ItemCell> requiredCells = new HashSet<>();
+      final Collection<ItemCell> eventCells = new HashSet<>();
+
       final int sentenceBegin = sentence.getBegin();
       final Collection<IdentifiedAnnotation> identifiedAnnotations
             = JCasUtil.selectCovered( jcas, IdentifiedAnnotation.class, sentence );
-      final Map<Integer, Collection<ItemCell>> coveringAnnotationMap = new HashMap<>();
-      for ( IdentifiedAnnotation identifiedAnnotation : identifiedAnnotations ) {
-         final Map<String, Collection<String>> semanticCuis = getSemanticCuis( identifiedAnnotation );
-         if ( semanticCuis.isEmpty() ) {
+      for ( IdentifiedAnnotation annotation : identifiedAnnotations ) {
+         final TextSpan textSpan = new DefaultTextSpan( annotation, sentenceBegin );
+         if ( textSpan.getWidth() == 0 ) {
             continue;
          }
-         final TextSpan textSpan = new DefaultTextSpan( identifiedAnnotation, sentenceBegin );
-         if ( textSpan.getWidth() == 0 ) {
+         final Map<String, Collection<String>> semanticCuis = getSemanticCuis( annotation );
+         if ( !semanticCuis.isEmpty() ) {
+            final ItemCell itemCell = new DefaultUmlsItemCell( textSpan, annotation.getPolarity(), semanticCuis );
+            requiredCells.add( itemCell );
+            usedTextSpans.add( textSpan );
+         } else if ( annotation instanceof TimeMention ) {
+            requiredCells.add( new TimexCell( textSpan ) );
+         } else if ( annotation instanceof EventMention ) {
+            eventCells.add( new EventCell( textSpan, annotation.getPolarity() ) );
+         }
+      }
+      final Map<Integer, Collection<ItemCell>> coveringAnnotationMap = new HashMap<>();
+      for ( ItemCell itemCell : requiredCells ) {
+         final Collection<ItemCell> coveredBaseItems = getCoveredBaseItems( itemCell.getTextSpan(), baseItemMap );
+         Collection<ItemCell> coveringAnnotations = coveringAnnotationMap.get( coveredBaseItems.size() );
+         if ( coveringAnnotations == null ) {
+            coveringAnnotations = new HashSet<>();
+            coveringAnnotationMap.put( coveredBaseItems.size(), coveringAnnotations );
+         }
+         coveringAnnotations.add( itemCell );
+      }
+      for ( ItemCell itemCell : eventCells ) {
+         if ( usedTextSpans.contains( itemCell.getTextSpan() ) ) {
             continue;
          }
-         final ItemCell itemCell = new DefaultUmlsItemCell( textSpan, identifiedAnnotation
-               .getPolarity(), semanticCuis );
-         final Collection<ItemCell> coveredBaseItems = getCoveredBaseItems( textSpan, baseItemMap );
-         Collection<ItemCell> coveringAnnotations
-               = coveringAnnotationMap.get( coveredBaseItems.size() );
+         final Collection<ItemCell> coveredBaseItems = getCoveredBaseItems( itemCell.getTextSpan(), baseItemMap );
+         Collection<ItemCell> coveringAnnotations = coveringAnnotationMap.get( coveredBaseItems.size() );
          if ( coveringAnnotations == null ) {
             coveringAnnotations = new HashSet<>();
             coveringAnnotationMap.put( coveredBaseItems.size(), coveringAnnotations );
@@ -195,13 +217,21 @@ final public class PrettyTextWriter {
       return coveringAnnotationMap;
    }
 
+
    /**
-    * @param offsets  original document offsets
     * @param itemRows item rows
     * @return map of original document offsets to adjusted printable offsets
     */
-   static private Map<Integer, Integer> createOffsetAdjustedMap( final Collection<Integer> offsets,
-                                                                 final Iterable<ItemRow> itemRows ) {
+   static private Map<Integer, Integer> createOffsetAdjustedMap( final Iterable<ItemRow> itemRows ) {
+      // Create list of all text span offsets.  Had to be here because BaseTokens did not contain all offsets.
+      final Collection<Integer> offsets = new HashSet<>();
+      for ( ItemRow itemRow : itemRows ) {
+         final Collection<ItemCell> rowItemCells = itemRow.getItemCells();
+         for ( ItemCell itemCell : rowItemCells ) {
+            offsets.add( itemCell.getTextSpan().getBegin() );
+            offsets.add( itemCell.getTextSpan().getEnd() );
+         }
+      }
       // Create map of all text span offsets to adjusted offsets
       final List<Integer> offsetList = new ArrayList<>( offsets );
       Collections.sort( offsetList );
@@ -251,16 +281,54 @@ final public class PrettyTextWriter {
             final String lineText = itemRow.getTextLine( i, rowWidth, offsetAdjustedMap );
             if ( !lineText.isEmpty() ) {
                if ( firstLine ) {
-                  writer.write( "TEXT:  " + lineText );
+                  writer.write( "SENTENCE:  " + lineText );
                   firstLine = false;
                } else {
-                  writer.write( "       " + lineText );
+                  writer.write( "           " + lineText );
 
                }
                writer.newLine();
             }
          }
       }
+   }
+
+
+   /**
+    * Print TLinks as "arg1 relationType arg2"
+    * @param jcas ye olde ...
+    * @param sentence annotation containing the sentence
+    * @param writer writer to which pretty text for the sentence should be written
+    * @throws IOException if the writer has issues
+    */
+   static private void printTLinks( final JCas jcas,
+                                    final AnnotationFS sentence,
+                                    final BufferedWriter writer ) throws IOException {
+      final Collection<TemporalTextRelation> tlinks = JCasUtil.select( jcas, TemporalTextRelation.class );
+      if ( tlinks == null || tlinks.isEmpty() ) {
+         return;
+      }
+      final Collection<TemporalTextRelation> sentenceTlinks = new ArrayList<>();
+      final TextSpan sentenceTextSpan = new DefaultTextSpan( sentence.getBegin(), sentence.getEnd() );
+      for ( TemporalTextRelation tlink : tlinks ) {
+         final Annotation argument1 = tlink.getArg1().getArgument();
+         final TextSpan argument1Span = new DefaultTextSpan( argument1, 0 );
+         if ( sentenceTextSpan.overlaps( argument1Span ) ) {
+            sentenceTlinks.add( tlink );
+         }
+      }
+      if ( sentenceTlinks.isEmpty() ) {
+         return;
+      }
+      final StringBuilder sb = new StringBuilder();
+      sb.append( "TLINKS:    " );
+      for ( TemporalTextRelation tlink : sentenceTlinks ) {
+         sb.append( tlink.getArg1().getArgument().getCoveredText() ).append( " " );
+         sb.append( tlink.getCategory() ).append( " " );
+         sb.append( tlink.getArg2().getArgument().getCoveredText() ).append( " , " );
+      }
+      sb.setLength( sb.length() - 3 );
+      writer.write( sb.toString() );
       writer.newLine();
    }