You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2020/04/24 20:06:58 UTC
svn commit: r1876941 [2/3] - in /ctakes/trunk:
ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cc/
ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/
ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/
ctakes-asserti...
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriter.java?rev=1876941&r1=1876940&r2=1876941&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriter.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriter.java Fri Apr 24 20:06:57 2020
@@ -2,8 +2,8 @@ package org.apache.ctakes.core.cc.proper
import org.apache.ctakes.core.cc.pretty.SemanticGroup;
-import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
-import org.apache.ctakes.core.util.OntologyConceptUtil;
+import org.apache.ctakes.core.util.annotation.OntologyConceptUtil;
+import org.apache.ctakes.core.util.doc.DocIdUtil;
import org.apache.ctakes.typesystem.type.refsem.*;
import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
import org.apache.ctakes.typesystem.type.textsem.*;
@@ -69,7 +69,7 @@ public class PropertyTextWriter {
*/
public void process( final JCas jcas ) {
LOGGER.info( "Starting processing" );
- final String docId = DocumentIDAnnotationUtil.getDocumentIdForFile( jcas );
+ final String docId = DocIdUtil.getDocumentIdForFile( jcas );
File outputFile;
if ( _outputDirPath == null || _outputDirPath.isEmpty() ) {
outputFile = new File( docId + FILE_EXTENSION );
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/AbstractFileTreeReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/AbstractFileTreeReader.java?rev=1876941&r1=1876940&r2=1876941&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/AbstractFileTreeReader.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/AbstractFileTreeReader.java Fri Apr 24 20:06:57 2020
@@ -6,7 +6,7 @@ import org.apache.ctakes.core.patient.Pa
import org.apache.ctakes.core.pipeline.ProgressManager;
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.core.util.NumberedSuffixComparator;
-import org.apache.ctakes.core.util.SourceMetadataUtil;
+import org.apache.ctakes.core.util.doc.SourceMetadataUtil;
import org.apache.ctakes.typesystem.type.structured.DocumentID;
import org.apache.ctakes.typesystem.type.structured.DocumentIdPrefix;
import org.apache.ctakes.typesystem.type.structured.DocumentPath;
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/JCasBuilder.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/JCasBuilder.java?rev=1876941&r1=1876940&r2=1876941&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/JCasBuilder.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/JCasBuilder.java Fri Apr 24 20:06:57 2020
@@ -2,8 +2,8 @@ package org.apache.ctakes.core.cr;
import org.apache.ctakes.core.note.NoteSpecs;
-import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
-import org.apache.ctakes.core.util.SourceMetadataUtil;
+import org.apache.ctakes.core.util.doc.DocIdUtil;
+import org.apache.ctakes.core.util.doc.SourceMetadataUtil;
import org.apache.ctakes.typesystem.type.structured.*;
import org.apache.uima.UIMAException;
import org.apache.uima.fit.factory.JCasFactory;
@@ -49,8 +49,8 @@ final public class JCasBuilder {
private String _encounterId = "";
// private int _encounterNum = -1;
- private String _docId = DocumentIDAnnotationUtil.NO_DOCUMENT_ID;
- private String _docIdPrefix = DocumentIDAnnotationUtil.NO_DOCUMENT_ID_PREFIX;
+ private String _docId = DocIdUtil.NO_DOCUMENT_ID;
+ private String _docIdPrefix = DocIdUtil.NO_DOCUMENT_ID_PREFIX;
private String _docType = NoteSpecs.ID_NAME_CLINICAL_NOTE;
private String _docSubType = "";
private String _docStandard = "";
@@ -258,13 +258,13 @@ final public class JCasBuilder {
sourceData.setSourceInstanceId( _instanceId );
}
- if ( ifWrite( _docId, DocumentIDAnnotationUtil.NO_DOCUMENT_ID ) ) {
+ if ( ifWrite( _docId, DocIdUtil.NO_DOCUMENT_ID ) ) {
final DocumentID documentId = new DocumentID( jCas );
documentId.setDocumentID( _docId );
documentId.addToIndexes();
}
- if ( ifWrite( _docIdPrefix, DocumentIDAnnotationUtil.NO_DOCUMENT_ID_PREFIX ) ) {
+ if ( ifWrite( _docIdPrefix, DocIdUtil.NO_DOCUMENT_ID_PREFIX ) ) {
final DocumentIdPrefix documentIdPrefix = new DocumentIdPrefix( jCas );
documentIdPrefix.setDocumentIdPrefix( _docIdPrefix );
documentIdPrefix.addToIndexes();
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/note/NoteSpecs.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/note/NoteSpecs.java?rev=1876941&r1=1876940&r2=1876941&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/note/NoteSpecs.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/note/NoteSpecs.java Fri Apr 24 20:06:57 2020
@@ -1,8 +1,8 @@
package org.apache.ctakes.core.note;
-import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
-import org.apache.ctakes.core.util.SourceMetadataUtil;
+import org.apache.ctakes.core.util.doc.DocIdUtil;
+import org.apache.ctakes.core.util.doc.SourceMetadataUtil;
import org.apache.ctakes.typesystem.type.structured.SourceData;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.log4j.Logger;
@@ -17,8 +17,8 @@ import java.util.Date;
import java.util.Objects;
import java.util.stream.Collectors;
-import static org.apache.ctakes.core.util.DocumentIDAnnotationUtil.NO_DOCUMENT_ID_PREFIX;
-import static org.apache.ctakes.core.util.SourceMetadataUtil.UNKNOWN_PATIENT;
+import static org.apache.ctakes.core.util.doc.DocIdUtil.NO_DOCUMENT_ID_PREFIX;
+import static org.apache.ctakes.core.util.doc.SourceMetadataUtil.UNKNOWN_PATIENT;
/**
* Contains information on a note. This is information can exist beyond the life of a cas.
@@ -55,7 +55,7 @@ final public class NoteSpecs {
public NoteSpecs( final JCas jCas ) {
final SourceData sourceData = SourceMetadataUtil.getSourceData( jCas );
_noteDate = createNoteDate( sourceData );
- _documentId = DocumentIDAnnotationUtil.getDocumentID( jCas );
+ _documentId = DocIdUtil.getDocumentID( jCas );
_documentType = createDocumentType( sourceData );
_documentText = jCas.getDocumentText();
_patientName = createPatientName( jCas, _documentId );
@@ -136,7 +136,7 @@ final public class NoteSpecs {
if ( patientId != null && !patientId.isEmpty() && !patientId.equals( UNKNOWN_PATIENT ) ) {
return patientId;
}
- final String idPrefix = DocumentIDAnnotationUtil.getDocumentIdPrefix( jCas );
+ final String idPrefix = DocIdUtil.getDocumentIdPrefix( jCas );
if ( idPrefix != null && !idPrefix.isEmpty() && !idPrefix.equals( NO_DOCUMENT_ID_PREFIX ) ) {
return idPrefix;
}
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/patient/AbstractPatientConsumer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/patient/AbstractPatientConsumer.java?rev=1876941&r1=1876940&r2=1876941&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/patient/AbstractPatientConsumer.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/patient/AbstractPatientConsumer.java Fri Apr 24 20:06:57 2020
@@ -2,7 +2,7 @@ package org.apache.ctakes.core.patient;
import org.apache.ctakes.core.ae.NamedEngine;
import org.apache.ctakes.core.pipeline.PipeBitInfo;
-import org.apache.ctakes.core.util.SourceMetadataUtil;
+import org.apache.ctakes.core.util.doc.SourceMetadataUtil;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/patient/PatientNoteStore.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/patient/PatientNoteStore.java?rev=1876941&r1=1876940&r2=1876941&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/patient/PatientNoteStore.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/patient/PatientNoteStore.java Fri Apr 24 20:06:57 2020
@@ -2,8 +2,8 @@ package org.apache.ctakes.core.patient;
import org.apache.ctakes.core.ae.NamedEngine;
-import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
-import org.apache.ctakes.core.util.SourceMetadataUtil;
+import org.apache.ctakes.core.util.doc.DocIdUtil;
+import org.apache.ctakes.core.util.doc.SourceMetadataUtil;
import org.apache.ctakes.typesystem.type.structured.DocumentIdPrefix;
import org.apache.ctakes.typesystem.type.structured.Metadata;
import org.apache.log4j.Logger;
@@ -163,7 +163,7 @@ public enum PatientNoteStore {
* If it has been set in the document metadata then that is used,
* otherwise it will come from the document's parent directory.
* @see SourceMetadataUtil#getPatientIdentifier(JCas)
- * @see DocumentIDAnnotationUtil#getDocumentIdPrefix(JCas)
+ * @see DocIdUtil#getDocumentIdPrefix(JCas)
*/
static public String getDefaultPatientId( final JCas viewCas ) {
final String patientIdentifier = SourceMetadataUtil.getPatientIdentifier( viewCas );
@@ -175,10 +175,10 @@ public enum PatientNoteStore {
/**
* @return the default identifier for a view of the document.
- * @see DocumentIDAnnotationUtil#getDocumentID(JCas)
+ * @see DocIdUtil#getDocumentID(JCas)
*/
static public String getDefaultDocumentId( final JCas viewCas ) {
- return DocumentIDAnnotationUtil.getDocumentID( viewCas );
+ return DocIdUtil.getDocumentID( viewCas );
}
///////////////// Store Views ///////////////
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/CuiCollector.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/CuiCollector.java?rev=1876941&r1=1876940&r2=1876941&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/CuiCollector.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/CuiCollector.java Fri Apr 24 20:06:57 2020
@@ -1,8 +1,8 @@
package org.apache.ctakes.core.pipeline;
-import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
-import org.apache.ctakes.core.util.OntologyConceptUtil;
+import org.apache.ctakes.core.util.annotation.OntologyConceptUtil;
+import org.apache.ctakes.core.util.doc.DocIdUtil;
import org.apache.log4j.Logger;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.jcas.JCas;
@@ -131,7 +131,7 @@ public enum CuiCollector {
@Override
public void process( final JCas jCas ) {
LOGGER.info( "Starting processing" );
- final String id = DocumentIDAnnotationUtil.getDeepDocumentId( jCas );
+ final String id = DocIdUtil.getDeepDocumentId( jCas );
final Map<String, Long> cuiCounts = OntologyConceptUtil.getCuiCounts( jCas );
CuiCollector.getInstance()._cuiCountMap.put( id, cuiCounts );
LOGGER.info( "Finished processing" );
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/EntityCollector.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/EntityCollector.java?rev=1876941&r1=1876940&r2=1876941&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/EntityCollector.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/EntityCollector.java Fri Apr 24 20:06:57 2020
@@ -1,7 +1,7 @@
package org.apache.ctakes.core.pipeline;
-import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.ctakes.core.util.doc.DocIdUtil;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.log4j.Logger;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
@@ -138,7 +138,7 @@ public enum EntityCollector {
@Override
public void process( final JCas jCas ) {
LOGGER.info( "Starting processing" );
- final String id = DocumentIDAnnotationUtil.getDeepDocumentId( jCas );
+ final String id = DocIdUtil.getDeepDocumentId( jCas );
final Collection<IdentifiedAnnotation> annotations = JCasUtil.select( jCas, IdentifiedAnnotation.class );
putEntities( id, annotations );
LOGGER.info( "Finished processing" );
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PiperFileReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PiperFileReader.java?rev=1876941&r1=1876940&r2=1876941&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PiperFileReader.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PiperFileReader.java Fri Apr 24 20:06:57 2020
@@ -3,7 +3,7 @@ package org.apache.ctakes.core.pipeline;
import org.apache.ctakes.core.cc.XmiWriterCasConsumerCtakes;
import org.apache.ctakes.core.resource.FileLocator;
-import org.apache.ctakes.core.util.DotLogger;
+import org.apache.ctakes.core.util.log.DotLogger;
import org.apache.log4j.Logger;
import org.apache.uima.UIMAException;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/semantic/SemanticTui.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/semantic/SemanticTui.java?rev=1876941&r1=1876940&r2=1876941&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/semantic/SemanticTui.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/semantic/SemanticTui.java Fri Apr 24 20:06:57 2020
@@ -1,6 +1,6 @@
package org.apache.ctakes.core.semantic;
-import org.apache.ctakes.core.util.OntologyConceptUtil;
+import org.apache.ctakes.core.util.annotation.OntologyConceptUtil;
import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/CalendarUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/CalendarUtil.java?rev=1876941&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/CalendarUtil.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/CalendarUtil.java Fri Apr 24 20:06:57 2020
@@ -0,0 +1,308 @@
+package org.apache.ctakes.core.util;
+
+
+import org.apache.ctakes.typesystem.type.refsem.Date;
+import org.apache.ctakes.typesystem.type.refsem.Time;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+
+import java.security.InvalidParameterException;
+import java.text.DateFormat;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.*;
+
+import static java.util.Calendar.*;
+import static org.apache.ctakes.typesystem.type.constants.CONST.TIME_CLASS_DATE;
+
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 1/8/2019
+ */
+final public class CalendarUtil {
+
+ private CalendarUtil() {
+ }
+
+ static public final Calendar NULL_CALENDAR = new Builder().setDate( 1, 1, 1 ).build();
+
+ static private final Collection<String> DATE_FORMAT_CODES = Arrays.asList(
+ "M-d-yy",
+ "MM-dd-yy",
+ "MM-dd-yyyy",
+ "M/d/yy",
+ "MM/dd/yy",
+ "MM/dd/yyyy",
+ "MMM d",
+ "MMM yyyy",
+ "MMM d yyyy",
+ "dd-MMM-yy",
+ "dd-MMM-yyyy"
+ );
+
+ static private final Collection<String> TIME_FORMAT_CODES = Arrays.asList(
+ "h:mm",
+ "h:mm a",
+ "h:mm:ss",
+ "h:mm:ss.SSS",
+ "h:mm z",
+ "h:mm a z",
+ "h:mm a, z",
+ "h a",
+ "h 'o''clock'",
+ "h 'o''clock' a"
+ );
+
+
+ static private final Collection<DateFormat> DATE_FORMATS = new ArrayList<>();
+ static private final Collection<DateFormat> DATE_TIME_FORMATS = new ArrayList<>();
+
+ static {
+ for ( String date : DATE_FORMAT_CODES ) {
+ DATE_FORMATS.add( new SimpleDateFormat( date ) );
+ DATE_FORMATS.add( new SimpleDateFormat( "EEE " + date ) );
+ DATE_FORMATS.add( new SimpleDateFormat( "EEE, " + date ) );
+ for ( String time : TIME_FORMAT_CODES ) {
+ DATE_TIME_FORMATS.add( new SimpleDateFormat( date + " " + time ) );
+ DATE_TIME_FORMATS.add( new SimpleDateFormat( date + " 'at' " + time ) );
+ DATE_TIME_FORMATS.add( new SimpleDateFormat( "EEE " + date + " " + time ) );
+ DATE_TIME_FORMATS.add( new SimpleDateFormat( "EEE, " + date + " 'at' " + time ) );
+
+ DATE_TIME_FORMATS.add( new SimpleDateFormat( time + " " + date ) );
+ DATE_TIME_FORMATS.add( new SimpleDateFormat( time + " 'on' " + date ) );
+ DATE_TIME_FORMATS.add( new SimpleDateFormat( time + ", EEE " + date ) );
+ DATE_TIME_FORMATS.add( new SimpleDateFormat( time + " 'on' EEE " + date ) );
+ DATE_TIME_FORMATS.add( new SimpleDateFormat( time + " 'on' EEE, " + date ) );
+ }
+ }
+ }
+
+
+ /**
+ * @param jCas ye olde ...
+ * @param begin begin index in doc text
+ * @param end end index in doc text
+ * @return TimeMention with normalized date and time and character offsets
+ */
+ static public TimeMention createTimeMention( final JCas jCas,
+ final int begin,
+ final int end ) {
+ final String docText = jCas.getDocumentText();
+ if ( begin < 0 || end > docText.length() || begin >= end ) {
+ throw new InvalidParameterException( "Offsets " + begin + "," + end
+ + " are outside document bounds 0," + docText.length() );
+ }
+ final String text = docText.substring( begin, end );
+ final TimeMention timeMention = createTimeMention( jCas, text );
+ timeMention.setBegin( begin );
+ timeMention.setEnd( end );
+ return timeMention;
+ }
+
+ /**
+ * @param jCas ye olde ...
+ * @param begin begin index in doc text
+ * @param end end index in doc text
+ * @param calendar some calendar with actual date information
+ * @return TimeMention with normalized date and time and character offsets
+ */
+ static public TimeMention createTimeMention( final JCas jCas,
+ final int begin,
+ final int end,
+ final Calendar calendar ) {
+ final String docText = jCas.getDocumentText();
+ if ( begin < 0 || end > docText.length() || begin >= end ) {
+ throw new InvalidParameterException( "Offsets " + begin + "," + end
+ + " are outside document bounds 0," + docText.length() );
+ }
+ final TimeMention timeMention = createTimeMention( jCas, calendar );
+ timeMention.setBegin( begin );
+ timeMention.setEnd( end );
+ return timeMention;
+ }
+
+ /**
+ * @param jCas ye olde ...
+ * @param text doc text
+ * @return TimeMention with normalized date and time and no character offsets
+ */
+ static public TimeMention createTimeMention( final JCas jCas,
+ final String text ) {
+ return createTimeMention( jCas, getCalendar( text ) );
+ }
+
+ /**
+ * @param jCas ye olde ...
+ * @param calendar some calendar with actual date information
+ * @return TimeMention with normalized date and time and no character offsets
+ */
+ static public TimeMention createTimeMention( final JCas jCas,
+ final Calendar calendar ) {
+ final Date date = createTypeDate( jCas, calendar );
+ final Time time = createTypeTime( jCas, calendar );
+ final TimeMention timeMention = new TimeMention( jCas );
+ timeMention.setDate( date );
+ timeMention.setTime( time );
+ // Right now there is only one time class.
+ timeMention.setTimeClass( TIME_CLASS_DATE );
+ return timeMention;
+ }
+
+ /**
+ * @param jCas ye olde ...
+ * @param text some text representing actual date information
+ * @return Type System Date with filled day, month, year values
+ */
+ static public Date createTypeDate( final JCas jCas, final String text ) {
+ final Calendar calendar = getCalendar( text );
+ return createTypeDate( jCas, calendar );
+ }
+
+ /**
+ * @param jCas ye olde ...
+ * @param calendar some calendar with actual date information
+ * @return Type System Date with filled day, month, year values
+ */
+ static public Date createTypeDate( final JCas jCas, final Calendar calendar ) {
+ final Date date = new Date( jCas );
+ date.setDay( "" + calendar.get( DAY_OF_MONTH ) );
+ date.setMonth( "" + (calendar.get( Calendar.MONTH ) + 1) );
+ date.setYear( "" + calendar.get( Calendar.YEAR ) );
+ return date;
+ }
+
+ /**
+ * @param jCas ye olde ...
+ * @param text some text representing actual time information
+ * @return Type System Time with filled day, month, year values
+ */
+ static public Time createTypeTime( final JCas jCas, final String text ) {
+ final Calendar calendar = getCalendar( text );
+ return createTypeTime( jCas, calendar );
+ }
+
+ /**
+ * @param jCas ye olde ...
+ * @param calendar some calendar with actual time information
+ * @return Type System Time with filled day, month, year values
+ */
+ static public Time createTypeTime( final JCas jCas, final Calendar calendar ) {
+ final Time time = new Time( jCas );
+ time.setNormalizedForm( calendar.get( HOUR_OF_DAY )
+ + ":" + calendar.get( MINUTE )
+ + ":" + calendar.get( SECOND )
+ + " " + calendar.get( AM_PM ) );
+ return time;
+ }
+
+ static public Calendar getCalendar( final Annotation annotation ) {
+ if ( annotation instanceof TimeMention ) {
+ return getTimeMentionCalendar( (TimeMention)annotation );
+ }
+ return getCalendar( annotation.getCoveredText() );
+ }
+
+ /**
+ * @param text -
+ * @return Calendar parsed from text, or {@link #NULL_CALENDAR}.
+ */
+ static public Calendar getCalendar( final String text ) {
+ final Calendar calendar = Calendar.getInstance();
+ for ( DateFormat format : DATE_FORMATS ) {
+ try {
+ java.util.Date date = format.parse( text );
+ calendar.setTime( date );
+ return calendar;
+ } catch ( ParseException pE ) {
+ // Continue
+ }
+ }
+ for ( DateFormat format : DATE_TIME_FORMATS ) {
+ try {
+ java.util.Date date = format.parse( text );
+ calendar.setTime( date );
+ return calendar;
+ } catch ( ParseException pE ) {
+ // Continue
+ }
+ }
+ return NULL_CALENDAR;
+ }
+
+ /**
+ * @param timeMention -
+ * @return Calendar created using preset date information in the TimeMention or its covered text, or {@link #NULL_CALENDAR}.
+ */
+ static private Calendar getTimeMentionCalendar( final TimeMention timeMention ) {
+ if ( timeMention == null ) {
+ return NULL_CALENDAR;
+ }
+ final Date typeDate = timeMention.getDate();
+ final Calendar typeCalendar = getCalendar( typeDate );
+ if ( !NULL_CALENDAR.equals( typeCalendar ) ) {
+ return typeCalendar;
+ }
+ return CalendarUtil.getCalendar( timeMention.getCoveredText() );
+ }
+
+ /**
+ * @param typeDate Type System Date, usually in a {@link TimeMention}.
+ * @return Calendar created using preset date information, or {@link #NULL_CALENDAR}.
+ */
+ static private Calendar getCalendar( final Date typeDate ) {
+ if ( typeDate == null ) {
+ return NULL_CALENDAR;
+ }
+ final int year = CalendarUtil.parseInt( typeDate.getYear() );
+ final int month = CalendarUtil.parseInt( typeDate.getMonth() );
+ final int day = CalendarUtil.parseInt( typeDate.getDay() );
+ if ( year == Integer.MIN_VALUE && month == Integer.MIN_VALUE && day == Integer.MIN_VALUE ) {
+ return NULL_CALENDAR;
+ }
+ final List<Integer> fields = new ArrayList<>( 6 );
+ if ( year != Integer.MIN_VALUE ) {
+ fields.add( Calendar.YEAR );
+ fields.add( year );
+ }
+ if ( month != Integer.MIN_VALUE ) {
+ fields.add( Calendar.MONTH );
+ fields.add( month - 1 );
+ }
+ if ( day != Integer.MIN_VALUE ) {
+ fields.add( Calendar.DAY_OF_MONTH );
+ fields.add( day );
+ }
+ final int[] array = new int[ fields.size() ];
+ for ( int i = 0; i < array.length; i++ ) {
+ array[ i ] = fields.get( i );
+ }
+ return new Builder().setFields( array ).build();
+ }
+
+
+ /**
+ * @param text -
+ * @return positive int value of text or {@link Integer#MIN_VALUE} if not possible.
+ */
+ static private int parseInt( final String text ) {
+ if ( text == null || text.isEmpty() ) {
+ return Integer.MIN_VALUE;
+ }
+ for ( char c : text.toCharArray() ) {
+ if ( !Character.isDigit( c ) ) {
+ return Integer.MIN_VALUE;
+ }
+ }
+ try {
+ return Integer.parseInt( text );
+ } catch ( NumberFormatException nfE ) {
+ return Integer.MIN_VALUE;
+ }
+ }
+
+
+}
+
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/CtakesFileNamer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/CtakesFileNamer.java?rev=1876941&r1=1876940&r2=1876941&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/CtakesFileNamer.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/CtakesFileNamer.java Fri Apr 24 20:06:57 2020
@@ -19,6 +19,7 @@ package org.apache.ctakes.core.util;
* under the License.
*/
+import org.apache.ctakes.core.util.doc.DocIdUtil;
import org.apache.uima.UimaContext;
import org.apache.uima.fit.component.initialize.ConfigurationParameterInitializer;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
@@ -54,7 +55,7 @@ public class CtakesFileNamer implements
int i = 1;
public String nameFile( JCas jcas ) {
- String sourceFileName = DocumentIDAnnotationUtil.getDocumentID( jcas );
+ String sourceFileName = DocIdUtil.getDocumentID( jcas );
StringBuilder b = new StringBuilder();
if ( prefix != null && !prefix.isEmpty() ) {
b.append( prefix );
@@ -62,7 +63,7 @@ public class CtakesFileNamer implements
if ( sourceFileName != null
&& !sourceFileName.isEmpty()
- && !sourceFileName.equals( DocumentIDAnnotationUtil.NO_DOCUMENT_ID ) ) {
+ && !sourceFileName.equals( DocIdUtil.NO_DOCUMENT_ID ) ) {
b.append( sourceFileName );
} else {
b.append( i++ );
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/DateParser.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/DateParser.java?rev=1876941&r1=1876940&r2=1876941&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/DateParser.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/DateParser.java Fri Apr 24 20:06:57 2020
@@ -18,16 +18,19 @@
*/
package org.apache.ctakes.core.util;
+import org.apache.ctakes.typesystem.type.refsem.Date;
+import org.apache.uima.jcas.JCas;
+
import java.text.DateFormat;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.List;
-import org.apache.uima.jcas.JCas;
-
-import org.apache.ctakes.typesystem.type.refsem.Date;
-
+/**
+ * @deprecated use core.util CalendarUtil
+ */
+@Deprecated
public class DateParser {
private static DateFormat df = DateFormat.getDateInstance();
@@ -74,6 +77,7 @@ public class DateParser {
/**
* First try parsing full date (month, day and year) using java.util.Date
* If that fails, try extracting at least part of the date
+ * @deprecated use core.util CalendarUtil
*/
public static Date parse(JCas jcas, String dateString) {
Date date = new Date(jcas);
@@ -130,6 +134,7 @@ public class DateParser {
/**
*
* @return if entire string is letters, returns length of s
+ * @deprecated use core.util CalendarUtil
*/
public static int getIndexFirstNonLetter(String s) {
for (int i=0; i<s.length(); i++) {
@@ -143,6 +148,7 @@ public class DateParser {
/**
*
* @return if entire string is letters, returns 0
+ * @deprecated use core.util CalendarUtil
*/
public static int getIndexAfterLastNonDigit(String s) {
for (int i=s.length(); i>0 ;) {
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/EssentialAnnotationUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/EssentialAnnotationUtil.java?rev=1876941&r1=1876940&r2=1876941&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/EssentialAnnotationUtil.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/EssentialAnnotationUtil.java Fri Apr 24 20:06:57 2020
@@ -1,448 +1,92 @@
package org.apache.ctakes.core.util;
-import org.apache.ctakes.core.util.textspan.TextSpan;
-import org.apache.ctakes.typesystem.type.constants.CONST;
import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
-import org.apache.ctakes.typesystem.type.relation.RelationArgument;
import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
-import org.apache.ctakes.typesystem.type.textsem.*;
-import org.apache.ctakes.typesystem.type.textspan.Segment;
-import org.apache.log4j.Logger;
-import org.apache.uima.fit.util.FSCollectionFactory;
-import org.apache.uima.fit.util.JCasUtil;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.Markable;
import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.cas.FSList;
-import org.apache.uima.jcas.tcas.Annotation;
-import java.util.*;
-import java.util.regex.Pattern;
-import java.util.stream.Collectors;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
/**
* @author SPF , chip-nlp
* @version %I%
* @since 12/29/2017
+ * @deprecated Use EssentialAnnotationUtil in (sub) package annotation
*/
+@Deprecated
final public class EssentialAnnotationUtil {
- static private final Logger LOGGER = Logger.getLogger( "EssentialAnnotationUtil" );
-
private EssentialAnnotationUtil() {
}
- static private final Pattern N_DOT_PATTERN = Pattern.compile( "N..?" );
-
- static private final java.util.function.Predicate<Annotation> ESSENTIALS
- = a -> EventMention.class.isInstance( a )
- || TimeMention.class.isInstance( a )
- || EntityMention.class.isInstance( a );
-
- static private Collection<IdentifiedAnnotation> getEssentialAnnotations( final Collection<IdentifiedAnnotation> annotations ) {
- return annotations.stream()
- .filter( ESSENTIALS )
- .collect( Collectors.toList() );
- }
-
- static private void cullToEssentialAnnotations( final Collection<Collection<IdentifiedAnnotation>> annotationCollections ) {
- final Collection<IdentifiedAnnotation> keepers = new HashSet<>();
- for ( Collection<IdentifiedAnnotation> annotations : annotationCollections ) {
- annotations.stream()
- .filter( ESSENTIALS )
- .forEach( keepers::add );
- annotations.retainAll( keepers );
- keepers.clear();
- }
- }
-
- static private Collection<IdentifiedAnnotation> getNonEssentialAnnotations( final Collection<IdentifiedAnnotation> allAnnotations,
- final Collection<IdentifiedAnnotation> essentialAnnotations ) {
- return allAnnotations.stream()
- .filter( a -> !essentialAnnotations.contains( a ) )
- .filter( a -> !Markable.class.isInstance( a ) )
- .collect( Collectors.toList() );
- }
-
-
+ @Deprecated
static public Collection<IdentifiedAnnotation> getRequiredAnnotations( final JCas jCas,
final Map<IdentifiedAnnotation, Collection<Integer>> corefIndexed ) {
- return getRequiredAnnotations( jCas, JCasUtil.select( jCas, IdentifiedAnnotation.class ), corefIndexed );
+ return org.apache.ctakes.core.util.annotation.EssentialAnnotationUtil.getRequiredAnnotations( jCas, corefIndexed );
}
+ @Deprecated
static public Collection<IdentifiedAnnotation> getRequiredAnnotations( final JCas jCas,
final Collection<IdentifiedAnnotation> allAnnotations,
final Map<IdentifiedAnnotation, Collection<Integer>> corefIndexed ) {
- return getRequiredAnnotations( allAnnotations, corefIndexed, JCasUtil.select( jCas, BinaryTextRelation.class ) );
+ return org.apache.ctakes.core.util.annotation.EssentialAnnotationUtil.getRequiredAnnotations( jCas, allAnnotations, corefIndexed );
}
+ @Deprecated
static public Collection<IdentifiedAnnotation> getRequiredAnnotations( final Collection<IdentifiedAnnotation> allAnnotations,
final Map<IdentifiedAnnotation, Collection<Integer>> corefIndexed,
final Collection<BinaryTextRelation> relations ) {
- final Collection<IdentifiedAnnotation> essentialAnnotations = getEssentialAnnotations( allAnnotations );
- // Collection of annotations required to cover all umls annotations, relations, coreferences
- final Collection<IdentifiedAnnotation> requiredAnnotations = new HashSet<>( essentialAnnotations );
- requiredAnnotations.addAll( corefIndexed.keySet() );
- requiredAnnotations.addAll( getRelationAnnotations( relations ) );
- return requiredAnnotations;
+ return org.apache.ctakes.core.util.annotation.EssentialAnnotationUtil.getRequiredAnnotations( allAnnotations, corefIndexed, relations );
}
/**
* @param jCas ye olde ...
* @return a map of markables to indexed chain numbers
*/
+ @Deprecated
static public Map<IdentifiedAnnotation, Collection<Integer>> createMarkableCorefs( final JCas jCas ) {
- final Collection<CollectionTextRelation> corefs = JCasUtil.select( jCas, CollectionTextRelation.class );
- final Map<Markable, IdentifiedAnnotation> markableAnnotations = mapMarkableAnnotations( jCas, corefs );
- return createMarkableCorefs( corefs, markableAnnotations );
+ return org.apache.ctakes.core.util.annotation.EssentialAnnotationUtil.createMarkableCorefs( jCas );
}
/**
* @param corefs coreference chains
* @return a map of markables to indexed chain numbers
*/
+ @Deprecated
static public Map<IdentifiedAnnotation, Collection<Integer>> createMarkableCorefs(
final Collection<CollectionTextRelation> corefs,
final Map<Markable, IdentifiedAnnotation> markableAnnotations ) {
- if ( corefs == null || corefs.isEmpty() ) {
- return Collections.emptyMap();
- }
- final Map<IdentifiedAnnotation, Collection<Integer>> corefMarkables = new HashMap<>();
- int index = 1;
- for ( CollectionTextRelation coref : corefs ) {
- final FSList chainHead = coref.getMembers();
- final Collection<Markable> markables = FSCollectionFactory.create( chainHead, Markable.class );
- for ( Markable markable : markables ) {
- final IdentifiedAnnotation annotation = markableAnnotations.get( markable );
- corefMarkables.putIfAbsent( annotation, new ArrayList<>() );
- corefMarkables.get( annotation )
- .add( index );
- }
- index++;
- }
- return corefMarkables;
+ return org.apache.ctakes.core.util.annotation.EssentialAnnotationUtil.createMarkableCorefs( corefs, markableAnnotations );
}
/**
* @param corefs coreference chains
* @return a map of markables to indexed chain numbers
*/
+ @Deprecated
static public Map<IdentifiedAnnotation, Collection<Integer>> createMarkableAssertedCorefs(
final Collection<CollectionTextRelation> corefs,
final Map<Markable, IdentifiedAnnotation> markableAnnotations ) {
- if ( corefs == null || corefs.isEmpty() ) {
- return Collections.emptyMap();
- }
-
- final List<List<IdentifiedAnnotation>> chains = new ArrayList<>();
- for ( CollectionTextRelation coref : corefs ) {
- final Map<String, List<IdentifiedAnnotation>> assertionMap = new HashMap<>();
- final FSList chainHead = coref.getMembers();
- final Collection<Markable> markables = FSCollectionFactory.create( chainHead, Markable.class );
- for ( Markable markable : markables ) {
- final IdentifiedAnnotation annotation = markableAnnotations.get( markable );
- final String assertion = getAssertion( annotation );
- assertionMap.computeIfAbsent( assertion, a -> new ArrayList<>() ).add( annotation );
- }
- for ( List<IdentifiedAnnotation> asserted : assertionMap.values() ) {
- if ( asserted.size() > 1 ) {
- asserted.sort( Comparator.comparingInt( Annotation::getBegin ) );
- chains.add( asserted );
- }
- }
- }
- chains.sort( ( l1, l2 ) -> l1.get( 0 ).getBegin() - l2.get( 0 ).getBegin() );
-
- final Map<IdentifiedAnnotation, Collection<Integer>> corefMarkables = new HashMap<>();
- int index = 1;
- for ( Collection<IdentifiedAnnotation> chain : chains ) {
- for ( IdentifiedAnnotation annotation : chain ) {
- corefMarkables.computeIfAbsent( annotation, a -> new ArrayList<>() ).add( index );
- }
- index++;
- }
- return corefMarkables;
- }
-
- static private String getAssertion( final IdentifiedAnnotation annotation ) {
- final StringBuilder sb = new StringBuilder();
- if ( annotation.getPolarity() == CONST.NE_POLARITY_NEGATION_PRESENT ) {
- sb.append( "AFFIRMED" );
- } else {
- sb.append( "NEGATED" );
- }
- if ( annotation.getUncertainty() == CONST.NE_UNCERTAINTY_PRESENT ) {
- sb.append( "UNCERTAIN" );
- }
- if ( annotation.getGeneric() ) {
- sb.append( "GENERIC" );
- }
- if ( annotation.getConditional() ) {
- sb.append( "CONDITIONAL" );
- }
- return sb.toString();
- }
-
- /**
- * This is a bit messy, but necessary.
- *
- * @param jCas -
- * @param corefs -
- * @return map of markable to identified annotation
- */
- static private Map<Markable, IdentifiedAnnotation> mapMarkableAnnotations(
- final JCas jCas, final Collection<CollectionTextRelation> corefs ) {
- if ( corefs == null || corefs.isEmpty() ) {
- return Collections.emptyMap();
- }
- final Map<Markable, Collection<ConllDependencyNode>> markableNodes
- = JCasUtil.indexCovered( jCas, Markable.class, ConllDependencyNode.class );
- final Map<ConllDependencyNode, Collection<IdentifiedAnnotation>> nodeAnnotations
- = JCasUtil.indexCovering( jCas, ConllDependencyNode.class, IdentifiedAnnotation.class );
- final Map<Markable, IdentifiedAnnotation> annotationMap = new HashMap<>();
- for ( CollectionTextRelation coref : corefs ) {
- final Collection<Markable> markables = JCasUtil.select( coref.getMembers(), Markable.class );
- for ( Markable markable : markables ) {
- final Collection<ConllDependencyNode> nodes = markableNodes.get( markable );
- if ( nodes == null || nodes.isEmpty() ) {
- continue;
- }
- final ConllDependencyNode headNode = getNominalHeadNode( new ArrayList<>( nodes ) );
- final Collection<IdentifiedAnnotation> headNodeAnnotations = nodeAnnotations.get( headNode );
- final Collection<IdentifiedAnnotation> essentialAnnotations = getEssentialAnnotations( headNodeAnnotations );
- final Collection<IdentifiedAnnotation> nonEssentialAnnotations = getNonEssentialAnnotations( headNodeAnnotations,
- essentialAnnotations );
-
- IdentifiedAnnotation bestAnnotation = null;
- int bestLength = Integer.MAX_VALUE;
- for ( IdentifiedAnnotation annotation : essentialAnnotations ) {
- if ( !EventMention.class.equals( annotation.getClass() )
- && annotation.getBegin() == markable.getBegin()
- && annotation.getEnd() == markable.getEnd() ) {
- // Prefer an exact non-event match over the longest match
- bestAnnotation = annotation;
- break;
- }
- if ( annotation.getEnd() - annotation.getBegin() < bestLength ) {
- bestLength = annotation.getEnd() - annotation.getBegin();
- bestAnnotation = annotation;
- }
- }
- if ( bestAnnotation != null ) {
- annotationMap.put( markable, bestAnnotation );
- continue;
- }
- for ( IdentifiedAnnotation annotation : nonEssentialAnnotations ) {
- if ( annotation.getEnd() - annotation.getBegin() < bestLength ) {
- bestLength = annotation.getEnd() - annotation.getBegin();
- bestAnnotation = annotation;
- }
- }
- if ( bestAnnotation != null ) {
- annotationMap.put( markable, bestAnnotation );
- } else {
- annotationMap.put( markable, markable );
- }
- }
- }
- return annotationMap;
+ return org.apache.ctakes.core.util.annotation.EssentialAnnotationUtil.createMarkableAssertedCorefs( corefs, markableAnnotations );
}
/**
* Finds the head node out of a few ConllDependencyNodes. Biased toward nouns.
**/
+ @Deprecated
static public ConllDependencyNode getNominalHeadNode( final List<ConllDependencyNode> nodes ) {
- final ArrayList<ConllDependencyNode> anodes = new ArrayList<>( nodes );
- final Boolean[][] matrixofheads = new Boolean[ anodes.size() ][ anodes.size() ];
- final List<ConllDependencyNode> outnodes = new ArrayList<>();
-
- // Remove root from consideration
- for ( int i = 0; i < anodes.size(); i++ ) {
- if ( anodes.get( i )
- .getId() == 0 ) {
- anodes.remove( i );
- }
- }
-
- // Create a dependency matrix
- for ( int id1 = 0; id1 < anodes.size(); id1++ ) {
- for ( int id2 = 0; id2 < anodes.size(); id2++ ) {
- // no head-dependency relationship between id1 and id2
- matrixofheads[ id2 ][ id1 ]
- = id1 != id2
- && anodes.get( id2 ).getHead() != null
- && anodes.get( id1 ).getId() == anodes.get( id2 ).getHead().getId();
- }
- }
-
- // Search the dependency matrix for the head
- for ( int idhd = 0; idhd < anodes.size(); idhd++ ) {
- boolean occupiedCol = false;
- for ( int row = 0; row < anodes.size(); row++ ) {
- if ( matrixofheads[ row ][ idhd ] ) {
- occupiedCol = true;
- }
- }
- if ( occupiedCol ) {
- boolean occupiedRow = false;
- for ( int col = 0; col < anodes.size(); col++ ) {
- if ( matrixofheads[ idhd ][ col ] ) {
- occupiedRow = true;
- }
- }
- if ( !occupiedRow ) {
- outnodes.add( anodes.get( idhd ) );
- }
- }
- }
-
- // Unheaded phrases
- if ( outnodes.isEmpty() ) {
- // pick a noun from the left, if there is one
- for ( int i = 0; i < anodes.size(); i++ ) {
- if ( anodes.get( i ) != null && anodes.get( i ).getPostag() != null
- && N_DOT_PATTERN.matcher( anodes.get( i ).getPostag() ).matches() ) {
- return anodes.get( i );
- }
- }
- // default to picking the rightmost node
- return anodes.get( anodes.size() - 1 );
- }
- // Headed phrases
- else {
- // pick a noun from the left, if there is one
- for ( int i = 0; i < outnodes.size(); i++ ) {
- if ( outnodes.get( i ) != null && outnodes.get( i ).getPostag() != null
- && N_DOT_PATTERN.matcher( outnodes.get( i ).getPostag() ).matches() ) {
- return outnodes.get( i );
- }
- }
- // otherwise, pick the rightmost node with dependencies
- return outnodes.get( outnodes.size() - 1 );
- }
+ return org.apache.ctakes.core.util.annotation.EssentialAnnotationUtil.getNominalHeadNode( nodes );
}
-
+ @Deprecated
static public Collection<IdentifiedAnnotation> getRelationAnnotations(
final Collection<BinaryTextRelation> relations ) {
- final Collection<IdentifiedAnnotation> relationAnnotations = new HashSet<>();
- for ( BinaryTextRelation relation : relations ) {
- IdentifiedAnnotation sourceIA;
- IdentifiedAnnotation targetIA;
- final RelationArgument arg1 = relation.getArg1();
- final org.apache.uima.jcas.tcas.Annotation source = arg1.getArgument();
- if ( source instanceof IdentifiedAnnotation ) {
- sourceIA = (IdentifiedAnnotation) source;
- } else {
- LOGGER.error( "Relation source is not an IdentifiedAnnotation " + source.getCoveredText() );
- continue;
- }
- final RelationArgument arg2 = relation.getArg2();
- final org.apache.uima.jcas.tcas.Annotation target = arg2.getArgument();
- if ( target instanceof IdentifiedAnnotation ) {
- targetIA = (IdentifiedAnnotation) target;
- } else {
- LOGGER.error( "Relation target is not an IdentifiedAnnotation " + source.getCoveredText() );
- continue;
- }
- relationAnnotations.add( sourceIA );
- relationAnnotations.add( targetIA );
- }
- return relationAnnotations;
- }
-
-
- // The assumption is that any given span can only have one exact EventMention.
- static private Collection<IdentifiedAnnotation> getEventMentions(
- final Collection<IdentifiedAnnotation> annotations ) {
- return annotations.stream()
- .filter( a -> EventMention.class.equals( a.getClass() ) )
- .collect( Collectors.toList() );
- }
-
- /**
- * @param annotationMap -
- * @return map of umls annotations to events
- */
- static private Map<IdentifiedAnnotation, Collection<IdentifiedAnnotation>> getAnnotationEvents(
- final Map<TextSpan, Collection<IdentifiedAnnotation>> annotationMap ) {
- final Map<IdentifiedAnnotation, Collection<IdentifiedAnnotation>> annotationEvents = new HashMap<>();
- final Map<TextSpan, Collection<IdentifiedAnnotation>> unusedEvents = new HashMap<>();
- for ( Map.Entry<TextSpan, Collection<IdentifiedAnnotation>> entry : annotationMap.entrySet() ) {
- final Collection<IdentifiedAnnotation> annotations = entry.getValue();
- final Collection<IdentifiedAnnotation> eventMentions = getEventMentions( annotations );
- if ( eventMentions != null && !eventMentions.isEmpty() ) {
- if ( annotations.size() > 1 ) {
- final int pre = annotationEvents.size();
- annotations.stream()
- .filter( EventMention.class::isInstance )
- .filter( a -> !eventMentions.contains( a ) )
- .forEach( a -> annotationEvents.put( a, eventMentions ) );
- if ( annotationEvents.size() > pre ) {
- annotations.removeAll( eventMentions );
- } else {
- unusedEvents.put( entry.getKey(), eventMentions );
- }
- } else {
- unusedEvents.put( entry.getKey(), eventMentions );
- }
- }
- }
- if ( unusedEvents.isEmpty() ) {
- return annotationEvents;
- }
- final Map<TextSpan, Collection<IdentifiedAnnotation>> usedEvents = new HashMap<>();
- for ( Map.Entry<TextSpan, Collection<IdentifiedAnnotation>> entry : annotationMap.entrySet() ) {
- final TextSpan span = entry.getKey();
- TextSpan usedEventSpan = null;
- for ( Map.Entry<TextSpan, Collection<IdentifiedAnnotation>> unusedEvent : unusedEvents.entrySet() ) {
- if ( !span.equals( unusedEvent.getKey() ) && span.contains( unusedEvent.getKey() ) ) {
- entry.getValue()
- .stream()
- .filter( EventMention.class::isInstance )
- .forEach( a -> annotationEvents.put( a, unusedEvent.getValue() ) );
- usedEventSpan = unusedEvent.getKey();
- usedEvents.put( usedEventSpan, unusedEvent.getValue() );
- break;
- }
- }
- if ( usedEventSpan != null ) {
- unusedEvents.remove( usedEventSpan );
- if ( unusedEvents.isEmpty() ) {
- break;
- }
- }
- }
- usedEvents.forEach( ( s, e ) -> annotationMap.get( s )
- .remove( e ) );
- final Collection<TextSpan> emptySpans = annotationMap.entrySet()
- .stream()
- .filter( e -> e.getValue()
- .isEmpty() )
- .map( Map.Entry::getKey )
- .collect( Collectors.toList() );
- annotationMap.keySet()
- .removeAll( emptySpans );
- return annotationEvents;
- }
-
- static private String createSectionName( final Segment section ) {
- final String sectionPref = section.getPreferredText();
- final String sectionId = section.getId();
- if ( sectionId != null && !sectionId.isEmpty() && !sectionId.equals( sectionPref ) ) {
- if ( sectionPref == null || sectionPref.isEmpty() ) {
- return sectionId;
- }
- return sectionPref + " " + sectionId;
- }
- if ( sectionPref != null && !sectionPref.isEmpty() ) {
- return sectionPref;
- }
- final String tagText = section.getTagText();
- if ( tagText == null || tagText.isEmpty() ) {
- return "Unknown Section";
- }
- return tagText;
+ return org.apache.ctakes.core.util.annotation.EssentialAnnotationUtil.getRelationAnnotations( relations );
}
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/Mapper.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/Mapper.java?rev=1876941&r1=1876940&r2=1876941&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/Mapper.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/Mapper.java Fri Apr 24 20:06:57 2020
@@ -26,14 +26,17 @@ import org.apache.ctakes.typesystem.type
* automatically is passed, "unknown relation" id is returned.
*
* @author dmitriy dligach
- *
+ * @deprecated use org.apache.ctakes.core.semantic.SemanticGroup. e.g. SemanticGroup.getBestGroup( annotation ).getCode();
*/
+@Deprecated
public class Mapper {
/**
* Map entity type to its integer id.
+ * @deprecated use org.apache.ctakes.core.semantic.SemanticGroup. SemanticGroup.getGroup( name ).getCode();
*/
- public static int getEntityTypeId(String entityType) {
+ @Deprecated
+ public static int getEntityTypeId( String entityType ) {
if(entityType.equals("Disease_Disorder")) return CONST.NE_TYPE_ID_DISORDER;
else if(entityType.equals("Procedure")) return CONST.NE_TYPE_ID_PROCEDURE;
@@ -45,8 +48,10 @@ public class Mapper {
/**
* Map modifier type to its integer id.
+ * @deprecated use org.apache.ctakes.core.semantic.SemanticGroup. SemanticGroup.getGroup( name ).getCode();
*/
- public static int getModifierTypeId(String modifierType) {
+ @Deprecated
+ public static int getModifierTypeId( String modifierType ) {
if(modifierType.equals("course_class")) return CONST.MODIFIER_TYPE_ID_COURSE_CLASS;
else if(modifierType.equals("severity_class")) return CONST.MODIFIER_TYPE_ID_SEVERITY_CLASS;
Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/EssentialAnnotationUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/EssentialAnnotationUtil.java?rev=1876941&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/EssentialAnnotationUtil.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/EssentialAnnotationUtil.java Fri Apr 24 20:06:57 2020
@@ -0,0 +1,455 @@
+package org.apache.ctakes.core.util.annotation;
+
+
+import org.apache.ctakes.core.util.textspan.TextSpan;
+import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.*;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.log4j.Logger;
+import org.apache.uima.fit.util.FSCollectionFactory;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSList;
+import org.apache.uima.jcas.tcas.Annotation;
+
+import java.util.*;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/29/2017
+ */
+final public class EssentialAnnotationUtil {
+
+ static private final Logger LOGGER = Logger.getLogger( "EssentialAnnotationUtil" );
+
+ private EssentialAnnotationUtil() {
+ }
+
+ static private final Pattern N_DOT_PATTERN = Pattern.compile( "N..?" );
+
+ static private final java.util.function.Predicate<Annotation> ESSENTIALS
+ = a -> EventMention.class.isInstance( a )
+ || TimeMention.class.isInstance( a )
+ || EntityMention.class.isInstance( a );
+
+ static private Collection<IdentifiedAnnotation> getEssentialAnnotations(
+ final Collection<IdentifiedAnnotation> annotations ) {
+ return annotations.stream()
+ .filter( ESSENTIALS )
+ .collect( Collectors.toList() );
+ }
+
+ static private void cullToEssentialAnnotations(
+ final Collection<Collection<IdentifiedAnnotation>> annotationCollections ) {
+ final Collection<IdentifiedAnnotation> keepers = new HashSet<>();
+ for ( Collection<IdentifiedAnnotation> annotations : annotationCollections ) {
+ annotations.stream()
+ .filter( ESSENTIALS )
+ .forEach( keepers::add );
+ annotations.retainAll( keepers );
+ keepers.clear();
+ }
+ }
+
+ static private Collection<IdentifiedAnnotation> getNonEssentialAnnotations(
+ final Collection<IdentifiedAnnotation> allAnnotations,
+ final Collection<IdentifiedAnnotation> essentialAnnotations ) {
+ return allAnnotations.stream()
+ .filter( a -> !essentialAnnotations.contains( a ) )
+ .filter( a -> !Markable.class.isInstance( a ) )
+ .collect( Collectors.toList() );
+ }
+
+
+ static public Collection<IdentifiedAnnotation> getRequiredAnnotations( final JCas jCas,
+ final Map<IdentifiedAnnotation, Collection<Integer>> corefIndexed ) {
+ return getRequiredAnnotations( jCas, JCasUtil.select( jCas, IdentifiedAnnotation.class ), corefIndexed );
+ }
+
+ static public Collection<IdentifiedAnnotation> getRequiredAnnotations( final JCas jCas,
+ final Collection<IdentifiedAnnotation> allAnnotations,
+ final Map<IdentifiedAnnotation, Collection<Integer>> corefIndexed ) {
+ return getRequiredAnnotations( allAnnotations, corefIndexed, JCasUtil.select( jCas, BinaryTextRelation.class ) );
+ }
+
+ static public Collection<IdentifiedAnnotation> getRequiredAnnotations(
+ final Collection<IdentifiedAnnotation> allAnnotations,
+ final Map<IdentifiedAnnotation, Collection<Integer>> corefIndexed,
+ final Collection<BinaryTextRelation> relations ) {
+ final Collection<IdentifiedAnnotation> essentialAnnotations = getEssentialAnnotations( allAnnotations );
+ // Collection of annotations required to cover all umls annotations, relations, coreferences
+ final Collection<IdentifiedAnnotation> requiredAnnotations = new HashSet<>( essentialAnnotations );
+ requiredAnnotations.addAll( corefIndexed.keySet() );
+ requiredAnnotations.addAll( getRelationAnnotations( relations ) );
+ return requiredAnnotations;
+ }
+
+ /**
+ * @param jCas ye olde ...
+ * @return a map of markables to indexed chain numbers
+ */
+ static public Map<IdentifiedAnnotation, Collection<Integer>> createMarkableCorefs( final JCas jCas ) {
+ final Collection<CollectionTextRelation> corefs = JCasUtil.select( jCas, CollectionTextRelation.class );
+ final Map<Markable, IdentifiedAnnotation> markableAnnotations = mapMarkableAnnotations( jCas, corefs );
+ return createMarkableCorefs( corefs, markableAnnotations );
+ }
+
+ /**
+ * @param corefs coreference chains
+ * @return a map of markables to indexed chain numbers
+ */
+ static public Map<IdentifiedAnnotation, Collection<Integer>> createMarkableCorefs(
+ final Collection<CollectionTextRelation> corefs,
+ final Map<Markable, IdentifiedAnnotation> markableAnnotations ) {
+ if ( corefs == null || corefs.isEmpty() ) {
+ return Collections.emptyMap();
+ }
+ final Map<IdentifiedAnnotation, Collection<Integer>> corefMarkables = new HashMap<>();
+ int index = 1;
+ for ( CollectionTextRelation coref : corefs ) {
+ final FSList chainHead = coref.getMembers();
+ final Collection<Markable> markables = FSCollectionFactory.create( chainHead, Markable.class );
+ for ( Markable markable : markables ) {
+ final IdentifiedAnnotation annotation = markableAnnotations.get( markable );
+ corefMarkables.putIfAbsent( annotation, new ArrayList<>() );
+ corefMarkables.get( annotation )
+ .add( index );
+ }
+ index++;
+ }
+ return corefMarkables;
+ }
+
+ /**
+ * @param corefs coreference chains
+ * @return a map of markables to indexed chain numbers
+ */
+ static public Map<IdentifiedAnnotation, Collection<Integer>> createMarkableAssertedCorefs(
+ final Collection<CollectionTextRelation> corefs,
+ final Map<Markable, IdentifiedAnnotation> markableAnnotations ) {
+ if ( corefs == null || corefs.isEmpty() ) {
+ return Collections.emptyMap();
+ }
+
+ final List<List<IdentifiedAnnotation>> chains = new ArrayList<>();
+ for ( CollectionTextRelation coref : corefs ) {
+ final Map<String, List<IdentifiedAnnotation>> assertionMap = new HashMap<>();
+ final FSList chainHead = coref.getMembers();
+ final Collection<Markable> markables = FSCollectionFactory.create( chainHead, Markable.class );
+ for ( Markable markable : markables ) {
+ final IdentifiedAnnotation annotation = markableAnnotations.get( markable );
+ final String assertion = getAssertion( annotation );
+ assertionMap.computeIfAbsent( assertion, a -> new ArrayList<>() ).add( annotation );
+ }
+ for ( List<IdentifiedAnnotation> asserted : assertionMap.values() ) {
+ if ( asserted.size() > 1 ) {
+ asserted.sort( Comparator.comparingInt( Annotation::getBegin ) );
+ chains.add( asserted );
+ }
+ }
+ }
+ chains.sort( ( l1, l2 ) -> l1.get( 0 ).getBegin() - l2.get( 0 ).getBegin() );
+
+ final Map<IdentifiedAnnotation, Collection<Integer>> corefMarkables = new HashMap<>();
+ int index = 1;
+ for ( Collection<IdentifiedAnnotation> chain : chains ) {
+ for ( IdentifiedAnnotation annotation : chain ) {
+ corefMarkables.computeIfAbsent( annotation, a -> new ArrayList<>() ).add( index );
+ }
+ index++;
+ }
+ return corefMarkables;
+ }
+
+ static private String getAssertion( final IdentifiedAnnotation annotation ) {
+ final StringBuilder sb = new StringBuilder();
+ if ( annotation.getPolarity() == CONST.NE_POLARITY_NEGATION_PRESENT ) {
+ sb.append( "AFFIRMED" );
+ } else {
+ sb.append( "NEGATED" );
+ }
+ if ( annotation.getUncertainty() == CONST.NE_UNCERTAINTY_PRESENT ) {
+ sb.append( "UNCERTAIN" );
+ }
+ if ( annotation.getGeneric() ) {
+ sb.append( "GENERIC" );
+ }
+ if ( annotation.getConditional() ) {
+ sb.append( "CONDITIONAL" );
+ }
+ return sb.toString();
+ }
+
+ /**
+ * This is a bit messy, but necessary.
+ *
+ * @param jCas -
+ * @param corefs -
+ * @return map of markable to identified annotation
+ */
+ static private Map<Markable, IdentifiedAnnotation> mapMarkableAnnotations(
+ final JCas jCas, final Collection<CollectionTextRelation> corefs ) {
+ if ( corefs == null || corefs.isEmpty() ) {
+ return Collections.emptyMap();
+ }
+ final Map<Markable, Collection<ConllDependencyNode>> markableNodes
+ = JCasUtil.indexCovered( jCas, Markable.class, ConllDependencyNode.class );
+ final Map<ConllDependencyNode, Collection<IdentifiedAnnotation>> nodeAnnotations
+ = JCasUtil.indexCovering( jCas, ConllDependencyNode.class, IdentifiedAnnotation.class );
+ final Map<Markable, IdentifiedAnnotation> annotationMap = new HashMap<>();
+ for ( CollectionTextRelation coref : corefs ) {
+ final Collection<Markable> markables = JCasUtil.select( coref.getMembers(), Markable.class );
+ for ( Markable markable : markables ) {
+ final Collection<ConllDependencyNode> nodes = markableNodes.get( markable );
+ if ( nodes == null || nodes.isEmpty() ) {
+ continue;
+ }
+ final ConllDependencyNode headNode = getNominalHeadNode( new ArrayList<>( nodes ) );
+ final Collection<IdentifiedAnnotation> headNodeAnnotations = nodeAnnotations.get( headNode );
+ final Collection<IdentifiedAnnotation> essentialAnnotations
+ = getEssentialAnnotations( headNodeAnnotations );
+ final Collection<IdentifiedAnnotation> nonEssentialAnnotations
+ = getNonEssentialAnnotations( headNodeAnnotations,
+ essentialAnnotations );
+
+ IdentifiedAnnotation bestAnnotation = null;
+ int bestLength = Integer.MAX_VALUE;
+ for ( IdentifiedAnnotation annotation : essentialAnnotations ) {
+ if ( !EventMention.class.equals( annotation.getClass() )
+ && annotation.getBegin() == markable.getBegin()
+ && annotation.getEnd() == markable.getEnd() ) {
+ // Prefer an exact non-event match over the longest match
+ bestAnnotation = annotation;
+ break;
+ }
+ if ( annotation.getEnd() - annotation.getBegin() < bestLength ) {
+ bestLength = annotation.getEnd() - annotation.getBegin();
+ bestAnnotation = annotation;
+ }
+ }
+ if ( bestAnnotation != null ) {
+ annotationMap.put( markable, bestAnnotation );
+ continue;
+ }
+ for ( IdentifiedAnnotation annotation : nonEssentialAnnotations ) {
+ if ( annotation.getEnd() - annotation.getBegin() < bestLength ) {
+ bestLength = annotation.getEnd() - annotation.getBegin();
+ bestAnnotation = annotation;
+ }
+ }
+ if ( bestAnnotation != null ) {
+ annotationMap.put( markable, bestAnnotation );
+ } else {
+ annotationMap.put( markable, markable );
+ }
+ }
+ }
+ return annotationMap;
+ }
+
+ /**
+ * Finds the head node out of a few ConllDependencyNodes. Biased toward nouns.
+ **/
+ static public ConllDependencyNode getNominalHeadNode( final List<ConllDependencyNode> nodes ) {
+ final ArrayList<ConllDependencyNode> anodes = new ArrayList<>( nodes );
+ final Boolean[][] matrixofheads = new Boolean[ anodes.size() ][ anodes.size() ];
+ final List<ConllDependencyNode> outnodes = new ArrayList<>();
+
+ // Remove root from consideration
+ for ( int i = 0; i < anodes.size(); i++ ) {
+ if ( anodes.get( i )
+ .getId() == 0 ) {
+ anodes.remove( i );
+ }
+ }
+
+ // Create a dependency matrix
+ for ( int id1 = 0; id1 < anodes.size(); id1++ ) {
+ for ( int id2 = 0; id2 < anodes.size(); id2++ ) {
+ // no head-dependency relationship between id1 and id2
+ matrixofheads[ id2 ][ id1 ]
+ = id1 != id2
+ && anodes.get( id2 ).getHead() != null
+ && anodes.get( id1 ).getId() == anodes.get( id2 ).getHead().getId();
+ }
+ }
+
+ // Search the dependency matrix for the head
+ for ( int idhd = 0; idhd < anodes.size(); idhd++ ) {
+ boolean occupiedCol = false;
+ for ( int row = 0; row < anodes.size(); row++ ) {
+ if ( matrixofheads[ row ][ idhd ] ) {
+ occupiedCol = true;
+ }
+ }
+ if ( occupiedCol ) {
+ boolean occupiedRow = false;
+ for ( int col = 0; col < anodes.size(); col++ ) {
+ if ( matrixofheads[ idhd ][ col ] ) {
+ occupiedRow = true;
+ }
+ }
+ if ( !occupiedRow ) {
+ outnodes.add( anodes.get( idhd ) );
+ }
+ }
+ }
+
+ // Unheaded phrases
+ if ( outnodes.isEmpty() ) {
+ // pick a noun from the left, if there is one
+ for ( int i = 0; i < anodes.size(); i++ ) {
+ if ( anodes.get( i ) != null && anodes.get( i ).getPostag() != null
+ && N_DOT_PATTERN.matcher( anodes.get( i ).getPostag() ).matches() ) {
+ return anodes.get( i );
+ }
+ }
+ // default to picking the rightmost node
+ return anodes.get( anodes.size() - 1 );
+ }
+ // Headed phrases
+ else {
+ // pick a noun from the left, if there is one
+ for ( int i = 0; i < outnodes.size(); i++ ) {
+ if ( outnodes.get( i ) != null && outnodes.get( i ).getPostag() != null
+ && N_DOT_PATTERN.matcher( outnodes.get( i ).getPostag() ).matches() ) {
+ return outnodes.get( i );
+ }
+ }
+ // otherwise, pick the rightmost node with dependencies
+ return outnodes.get( outnodes.size() - 1 );
+ }
+ }
+
+
+ static public Collection<IdentifiedAnnotation> getRelationAnnotations(
+ final Collection<BinaryTextRelation> relations ) {
+ final Collection<IdentifiedAnnotation> relationAnnotations = new HashSet<>();
+ for ( BinaryTextRelation relation : relations ) {
+ IdentifiedAnnotation sourceIA;
+ IdentifiedAnnotation targetIA;
+ final RelationArgument arg1 = relation.getArg1();
+ final Annotation source = arg1.getArgument();
+ if ( source instanceof IdentifiedAnnotation ) {
+ sourceIA = (IdentifiedAnnotation)source;
+ } else {
+ LOGGER.error( "Relation source is not an IdentifiedAnnotation " + source.getCoveredText() );
+ continue;
+ }
+ final RelationArgument arg2 = relation.getArg2();
+ final Annotation target = arg2.getArgument();
+ if ( target instanceof IdentifiedAnnotation ) {
+ targetIA = (IdentifiedAnnotation)target;
+ } else {
+ LOGGER.error( "Relation target is not an IdentifiedAnnotation " + source.getCoveredText() );
+ continue;
+ }
+ relationAnnotations.add( sourceIA );
+ relationAnnotations.add( targetIA );
+ }
+ return relationAnnotations;
+ }
+
+
+ // The assumption is that any given span can only have one exact EventMention.
+ static private Collection<IdentifiedAnnotation> getEventMentions(
+ final Collection<IdentifiedAnnotation> annotations ) {
+ return annotations.stream()
+ .filter( a -> EventMention.class.equals( a.getClass() ) )
+ .collect( Collectors.toList() );
+ }
+
+ /**
+ * @param annotationMap -
+ * @return map of umls annotations to events
+ */
+ static private Map<IdentifiedAnnotation, Collection<IdentifiedAnnotation>> getAnnotationEvents(
+ final Map<TextSpan, Collection<IdentifiedAnnotation>> annotationMap ) {
+ final Map<IdentifiedAnnotation, Collection<IdentifiedAnnotation>> annotationEvents = new HashMap<>();
+ final Map<TextSpan, Collection<IdentifiedAnnotation>> unusedEvents = new HashMap<>();
+ for ( Map.Entry<TextSpan, Collection<IdentifiedAnnotation>> entry : annotationMap.entrySet() ) {
+ final Collection<IdentifiedAnnotation> annotations = entry.getValue();
+ final Collection<IdentifiedAnnotation> eventMentions = getEventMentions( annotations );
+ if ( eventMentions != null && !eventMentions.isEmpty() ) {
+ if ( annotations.size() > 1 ) {
+ final int pre = annotationEvents.size();
+ annotations.stream()
+ .filter( EventMention.class::isInstance )
+ .filter( a -> !eventMentions.contains( a ) )
+ .forEach( a -> annotationEvents.put( a, eventMentions ) );
+ if ( annotationEvents.size() > pre ) {
+ annotations.removeAll( eventMentions );
+ } else {
+ unusedEvents.put( entry.getKey(), eventMentions );
+ }
+ } else {
+ unusedEvents.put( entry.getKey(), eventMentions );
+ }
+ }
+ }
+ if ( unusedEvents.isEmpty() ) {
+ return annotationEvents;
+ }
+ final Map<TextSpan, Collection<IdentifiedAnnotation>> usedEvents = new HashMap<>();
+ for ( Map.Entry<TextSpan, Collection<IdentifiedAnnotation>> entry : annotationMap.entrySet() ) {
+ final TextSpan span = entry.getKey();
+ TextSpan usedEventSpan = null;
+ for ( Map.Entry<TextSpan, Collection<IdentifiedAnnotation>> unusedEvent : unusedEvents.entrySet() ) {
+ if ( !span.equals( unusedEvent.getKey() ) && span.contains( unusedEvent.getKey() ) ) {
+ entry.getValue()
+ .stream()
+ .filter( EventMention.class::isInstance )
+ .forEach( a -> annotationEvents.put( a, unusedEvent.getValue() ) );
+ usedEventSpan = unusedEvent.getKey();
+ usedEvents.put( usedEventSpan, unusedEvent.getValue() );
+ break;
+ }
+ }
+ if ( usedEventSpan != null ) {
+ unusedEvents.remove( usedEventSpan );
+ if ( unusedEvents.isEmpty() ) {
+ break;
+ }
+ }
+ }
+ usedEvents.forEach( ( s, e ) -> annotationMap.get( s )
+ .remove( e ) );
+ final Collection<TextSpan> emptySpans = annotationMap.entrySet()
+ .stream()
+ .filter( e -> e.getValue()
+ .isEmpty() )
+ .map( Map.Entry::getKey )
+ .collect( Collectors.toList() );
+ annotationMap.keySet()
+ .removeAll( emptySpans );
+ return annotationEvents;
+ }
+
+ static private String createSectionName( final Segment section ) {
+ final String sectionPref = section.getPreferredText();
+ final String sectionId = section.getId();
+ if ( sectionId != null && !sectionId.isEmpty() && !sectionId.equals( sectionPref ) ) {
+ if ( sectionPref == null || sectionPref.isEmpty() ) {
+ return sectionId;
+ }
+ return sectionPref + " " + sectionId;
+ }
+ if ( sectionPref != null && !sectionPref.isEmpty() ) {
+ return sectionPref;
+ }
+ final String tagText = section.getTagText();
+ if ( tagText == null || tagText.isEmpty() ) {
+ return "Unknown Section";
+ }
+ return tagText;
+ }
+
+
+}
Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationUtil.java?rev=1876941&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationUtil.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationUtil.java Fri Apr 24 20:06:57 2020
@@ -0,0 +1,99 @@
+package org.apache.ctakes.core.util.annotation;
+
+
+import org.apache.ctakes.core.semantic.SemanticGroup;
+import org.apache.ctakes.core.semantic.SemanticTui;
+import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+
+import java.util.Collection;
+import java.util.Objects;
+import java.util.stream.Collectors;
+
+/**
+ * A one-stop shop for the most commonly requested Identified Annotation properties.
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 4/21/2020
+ */
+final public class IdentifiedAnnotationUtil {
+
+ private IdentifiedAnnotationUtil() {
+ }
+
+ /**
+ * @param annotation -
+ * @return true iff the annotation is not generic, not uncertain, not negated and not conditional.
+ */
+ static public boolean isRealAffirmed( final IdentifiedAnnotation annotation ) {
+ return !isGeneric( annotation )
+ && !isUncertain( annotation )
+ && !isNegated( annotation )
+ && !isConditional( annotation );
+ }
+
+ static public boolean isGeneric( final IdentifiedAnnotation annotation ) {
+ return annotation.getGeneric();
+ }
+
+ static public boolean isUncertain( final IdentifiedAnnotation annotation ) {
+ return annotation.getUncertainty() == CONST.NE_UNCERTAINTY_PRESENT;
+ }
+
+ static public boolean isNegated( final IdentifiedAnnotation annotation ) {
+ return annotation.getPolarity() == CONST.NE_POLARITY_NEGATION_PRESENT;
+ }
+
+ static public boolean isConditional( final IdentifiedAnnotation annotation ) {
+ return annotation.getConditional();
+ }
+
+ static public String getText( final IdentifiedAnnotation annotation ) {
+ return annotation.getCoveredText();
+ }
+
+ static public boolean isHistoric( final IdentifiedAnnotation annotation ) {
+ return annotation.getHistoryOf() == CONST.NE_HISTORY_OF_PRESENT;
+ }
+
+ static public SemanticGroup getSemanticGroup( final IdentifiedAnnotation annotation ) {
+ return SemanticGroup.getBestGroup( annotation );
+ }
+
+ static public Collection<SemanticTui> getSemanticTui( final IdentifiedAnnotation annotation ) {
+ return SemanticTui.getTuis( annotation );
+ }
+
+ static public Collection<String> getCuis( final IdentifiedAnnotation annotation ) {
+ return OntologyConceptUtil.getCuis( annotation );
+ }
+
+ /**
+ * @param annotation -
+ * @return a collection of schemes with codes for the given annotation. e.g. snomed_us, rxnorm.
+ */
+ static public Collection<String> getCodeSchemes( final IdentifiedAnnotation annotation ) {
+ return OntologyConceptUtil.getSchemeCodes( annotation ).keySet();
+ }
+
+ /**
+ * @param annotation -
+ * @param schemeName the name of a coding scheme. e.g. snomed_us, rxnorm.
+ * @return all annotation codes for the given coding scheme.
+ */
+ static public Collection<String> getCodes( final IdentifiedAnnotation annotation, final String schemeName ) {
+ return OntologyConceptUtil.getCodes( annotation, schemeName );
+ }
+
+ static public Collection<String> getPreferredText( final IdentifiedAnnotation annotation ) {
+ return OntologyConceptUtil.getUmlsConceptStream( annotation )
+ .map( UmlsConcept::getPreferredText )
+ .filter( Objects::nonNull )
+ .filter( t -> !t.isEmpty() )
+ .collect( Collectors.toSet() );
+ }
+
+
+}
Copied: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/OntologyConceptUtil.java (from r1876940, ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/OntologyConceptUtil.java)
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/OntologyConceptUtil.java?p2=ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/OntologyConceptUtil.java&p1=ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/OntologyConceptUtil.java&r1=1876940&r2=1876941&rev=1876941&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/OntologyConceptUtil.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/OntologyConceptUtil.java Fri Apr 24 20:06:57 2020
@@ -1,4 +1,4 @@
-package org.apache.ctakes.core.util;
+package org.apache.ctakes.core.util.annotation;
import org.apache.ctakes.typesystem.type.refsem.OntologyConcept;
import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
Copied: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/WordTokenUtil.java (from r1876940, ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/WordTokenUtil.java)
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/WordTokenUtil.java?p2=ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/WordTokenUtil.java&p1=ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/WordTokenUtil.java&r1=1876940&r2=1876941&rev=1876941&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/WordTokenUtil.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/WordTokenUtil.java Fri Apr 24 20:06:57 2020
@@ -1,4 +1,4 @@
-package org.apache.ctakes.core.util;
+package org.apache.ctakes.core.util.annotation;
import org.apache.ctakes.typesystem.type.syntax.WordToken;