You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2016/05/17 19:12:04 UTC

svn commit: r1744307 - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal: ae/THYMEAnaforaXMLReader.java eval/Evaluation_ImplBase.java

Author: tmill
Date: Tue May 17 19:12:04 2016
New Revision: 1744307

URL: http://svn.apache.org/viewvc?rev=1744307&view=rev
Log:
Fixes to thyme readers to allow for reading deepphe data.

Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMEAnaforaXMLReader.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMEAnaforaXMLReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMEAnaforaXMLReader.java?rev=1744307&r1=1744306&r2=1744307&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMEAnaforaXMLReader.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMEAnaforaXMLReader.java Tue May 17 19:12:04 2016
@@ -127,11 +127,13 @@ public class THYMEAnaforaXMLReader exten
         break;
       }
     }
-    if (xmlFile == null) {
+    if (this.anaforaXMLSuffixes.length > 0 && xmlFile == null) {
       throw new IllegalArgumentException("no Anafora XML file found from " + possibleXMLFiles);
     }
 
-    processXmlFile(jCas, xmlFile);
+    if(xmlFile != null){
+      processXmlFile(jCas, xmlFile);
+    }
     if(corefFile.exists()){
     	processXmlFile(jCas, corefFile);
     }
@@ -153,6 +155,7 @@ public class THYMEAnaforaXMLReader exten
     int curEventId = 1;
     int curTimexId = 1;
     int curRelId = 1;
+    int docLen = jCas.getDocumentText().length();
     
     for (Element annotationsElem : dataElem.getChildren("annotations")) {
 
@@ -181,6 +184,10 @@ public class THYMEAnaforaXMLReader exten
             end = spanEnd;
           }
         }
+        if(begin < 0 || end >= docLen){
+          error("Illegal begin or end boundary", id);
+          continue;
+        }
 
         Annotation annotation;
         if (type.equals("EVENT")) {
@@ -248,6 +255,9 @@ public class THYMEAnaforaXMLReader exten
           annotation = timeMention;
 
         } else if (type.equals("Markable")) {
+          while(end >= begin && (jCas.getDocumentText().charAt(end-1) == '\n' || jCas.getDocumentText().charAt(end-1) == '\r')){
+            end--;
+          }
           Markable markable = new Markable(jCas, begin, end);
           markable.addToIndexes();
           annotation = markable;

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java?rev=1744307&r1=1744306&r2=1744307&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java Tue May 17 19:12:04 2016
@@ -450,10 +450,21 @@ public abstract class Evaluation_ImplBas
             GOLD_VIEW_NAME ) );
       switch ( this.xmlFormat ) {
          case Anafora:
+           if(this.subcorpus == Subcorpus.DeepPhe){
+            aggregateBuilder.add(
+                  AnalysisEngineFactory.createEngineDescription(THYMEAnaforaXMLReader.class,
+                      THYMEAnaforaXMLReader.PARAM_ANAFORA_DIRECTORY,
+                      this.xmlDirectory,
+                      THYMEAnaforaXMLReader.PARAM_ANAFORA_XML_SUFFIXES,
+                      new String[]{} ),
+                  CAS.NAME_DEFAULT_SOFA,
+                  GOLD_VIEW_NAME );
+           }else{
             aggregateBuilder.add(
                   THYMEAnaforaXMLReader.getDescription( this.xmlDirectory ),
                   CAS.NAME_DEFAULT_SOFA,
                   GOLD_VIEW_NAME );
+           }
             break;
          case Knowtator:
             aggregateBuilder.add(
@@ -470,9 +481,12 @@ public abstract class Evaluation_ImplBas
       }
 
       // identify segments
-      aggregateBuilder
-            .add( AnalysisEngineFactory.createEngineDescription( SegmentsFromBracketedSectionTagsAnnotator.class ) );
-
+      if(this.subcorpus == Subcorpus.DeepPhe){
+        aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(PittHeaderAnnotator.class));
+      }else{
+        aggregateBuilder
+        .add( AnalysisEngineFactory.createEngineDescription( SegmentsFromBracketedSectionTagsAnnotator.class ) );
+      }
       // identify sentences
       aggregateBuilder.add( AnalysisEngineFactory.createEngineDescription(
             SentenceDetector.class,
@@ -653,6 +667,25 @@ public abstract class Evaluation_ImplBas
       }
    }
 
+   /**
+    * Grabs the document time from the header
+    */
+   public static class PittHeaderAnnotator extends JCasAnnotator_ImplBase {
+
+     /**
+      * Grabs the document time from the header
+      * {@inheritDoc}
+      */
+     @Override
+     public void process( final JCas jcas ) throws AnalysisEngineProcessException {
+       String docText = jcas.getDocumentText();
+       int headerEnd = docText.indexOf("\n", docText.indexOf("[Report de-identified"));
+       Segment mainSegment = new Segment(jcas, headerEnd+1, docText.length()-1);
+       mainSegment.setId("SIMPLE_SEGMENT");
+       mainSegment.addToIndexes();
+     }
+   }
+
    static File getXMIFile( File xmiDirectory, File textFile ) {
 	   String fileName = textFile.getName();
 	   if(!fileName.contains(".xmi")){