You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2016/05/17 19:12:04 UTC
svn commit: r1744307 - in
/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal:
ae/THYMEAnaforaXMLReader.java eval/Evaluation_ImplBase.java
Author: tmill
Date: Tue May 17 19:12:04 2016
New Revision: 1744307
URL: http://svn.apache.org/viewvc?rev=1744307&view=rev
Log:
Fixes to thyme readers to allow for reading deepphe data.
Modified:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMEAnaforaXMLReader.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMEAnaforaXMLReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMEAnaforaXMLReader.java?rev=1744307&r1=1744306&r2=1744307&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMEAnaforaXMLReader.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMEAnaforaXMLReader.java Tue May 17 19:12:04 2016
@@ -127,11 +127,13 @@ public class THYMEAnaforaXMLReader exten
break;
}
}
- if (xmlFile == null) {
+ if (this.anaforaXMLSuffixes.length > 0 && xmlFile == null) {
throw new IllegalArgumentException("no Anafora XML file found from " + possibleXMLFiles);
}
- processXmlFile(jCas, xmlFile);
+ if(xmlFile != null){
+ processXmlFile(jCas, xmlFile);
+ }
if(corefFile.exists()){
processXmlFile(jCas, corefFile);
}
@@ -153,6 +155,7 @@ public class THYMEAnaforaXMLReader exten
int curEventId = 1;
int curTimexId = 1;
int curRelId = 1;
+ int docLen = jCas.getDocumentText().length();
for (Element annotationsElem : dataElem.getChildren("annotations")) {
@@ -181,6 +184,10 @@ public class THYMEAnaforaXMLReader exten
end = spanEnd;
}
}
+ if(begin < 0 || end >= docLen){
+ error("Illegal begin or end boundary", id);
+ continue;
+ }
Annotation annotation;
if (type.equals("EVENT")) {
@@ -248,6 +255,9 @@ public class THYMEAnaforaXMLReader exten
annotation = timeMention;
} else if (type.equals("Markable")) {
+ while(end >= begin && (jCas.getDocumentText().charAt(end-1) == '\n' || jCas.getDocumentText().charAt(end-1) == '\r')){
+ end--;
+ }
Markable markable = new Markable(jCas, begin, end);
markable.addToIndexes();
annotation = markable;
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java?rev=1744307&r1=1744306&r2=1744307&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java Tue May 17 19:12:04 2016
@@ -450,10 +450,21 @@ public abstract class Evaluation_ImplBas
GOLD_VIEW_NAME ) );
switch ( this.xmlFormat ) {
case Anafora:
+ if(this.subcorpus == Subcorpus.DeepPhe){
+ aggregateBuilder.add(
+ AnalysisEngineFactory.createEngineDescription(THYMEAnaforaXMLReader.class,
+ THYMEAnaforaXMLReader.PARAM_ANAFORA_DIRECTORY,
+ this.xmlDirectory,
+ THYMEAnaforaXMLReader.PARAM_ANAFORA_XML_SUFFIXES,
+ new String[]{} ),
+ CAS.NAME_DEFAULT_SOFA,
+ GOLD_VIEW_NAME );
+ }else{
aggregateBuilder.add(
THYMEAnaforaXMLReader.getDescription( this.xmlDirectory ),
CAS.NAME_DEFAULT_SOFA,
GOLD_VIEW_NAME );
+ }
break;
case Knowtator:
aggregateBuilder.add(
@@ -470,9 +481,12 @@ public abstract class Evaluation_ImplBas
}
// identify segments
- aggregateBuilder
- .add( AnalysisEngineFactory.createEngineDescription( SegmentsFromBracketedSectionTagsAnnotator.class ) );
-
+ if(this.subcorpus == Subcorpus.DeepPhe){
+ aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(PittHeaderAnnotator.class));
+ }else{
+ aggregateBuilder
+ .add( AnalysisEngineFactory.createEngineDescription( SegmentsFromBracketedSectionTagsAnnotator.class ) );
+ }
// identify sentences
aggregateBuilder.add( AnalysisEngineFactory.createEngineDescription(
SentenceDetector.class,
@@ -653,6 +667,25 @@ public abstract class Evaluation_ImplBas
}
}
+ /**
+ * Grabs the document time from the header
+ */
+ public static class PittHeaderAnnotator extends JCasAnnotator_ImplBase {
+
+ /**
+ * Grabs the document time from the header
+ * {@inheritDoc}
+ */
+ @Override
+ public void process( final JCas jcas ) throws AnalysisEngineProcessException {
+ String docText = jcas.getDocumentText();
+ int headerEnd = docText.indexOf("\n", docText.indexOf("[Report de-identified"));
+ Segment mainSegment = new Segment(jcas, headerEnd+1, docText.length()-1);
+ mainSegment.setId("SIMPLE_SEGMENT");
+ mainSegment.addToIndexes();
+ }
+ }
+
static File getXMIFile( File xmiDirectory, File textFile ) {
String fileName = textFile.getName();
if(!fileName.contains(".xmi")){