You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2015/10/06 23:40:52 UTC

svn commit: r1707152 - in /ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis: MetastasisAnaforaXMLReader.java MetastasisXmiGenerationPipeline.java

Author: dligach
Date: Tue Oct  6 21:40:52 2015
New Revision: 1707152

URL: http://svn.apache.org/viewvc?rev=1707152&view=rev
Log:
choose automatically one of two possible xml file name suffixes depending on which one actually exists

Modified:
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisAnaforaXMLReader.java
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisXmiGenerationPipeline.java

Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisAnaforaXMLReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisAnaforaXMLReader.java?rev=1707152&r1=1707151&r2=1707152&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisAnaforaXMLReader.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisAnaforaXMLReader.java Tue Oct  6 21:40:52 2015
@@ -49,35 +49,29 @@ public class MetastasisAnaforaXMLReader
   
   private static Logger LOGGER = Logger.getLogger(MetastasisAnaforaXMLReader.class);
 
-  public static final String PARAM_ANAFORA_DIRECTORY = "anaforaDirectory";
-
-  @ConfigurationParameter(
-      name = PARAM_ANAFORA_DIRECTORY,
-      description = "root directory of the Anafora-annotated files, with one subdirectory for "
-          + "each annotated file")
-  private File anaforaDirectory;
-
   public static AnalysisEngineDescription getDescription() throws ResourceInitializationException {
     return AnalysisEngineFactory.createEngineDescription(MetastasisAnaforaXMLReader.class);
   }
 
-  public static AnalysisEngineDescription getDescription(File anaforaDirectory)
-      throws ResourceInitializationException {
-    return AnalysisEngineFactory.createEngineDescription(
-        MetastasisAnaforaXMLReader.class,
-        MetastasisAnaforaXMLReader.PARAM_ANAFORA_DIRECTORY,
-        anaforaDirectory);
-  }
-
   @Override
   public void process(JCas jCas) throws AnalysisEngineProcessException {
     
-    // determine source text file
+    // locate the Anafora xml file, whose suffix is either
+    // 'UmlsDeepPhe.dave.completed.xml' or 'UmlsDeepPhe.dave.inprogress.xml'
     String textFileName = ViewUriUtil.getURI(jCas).getPath();
-    String xmlFileName = textFileName + ".UmlsDeepPhe.dave.inprogress.xml";
-    LOGGER.info("processing xml file: " + xmlFileName);
-
-    processXmlFile(jCas, new File(xmlFileName));
+    String xmlFileNameAlternative1 = textFileName + ".UmlsDeepPhe.dave.completed.xml";
+    String xmlFileNameAlternative2 = textFileName + ".UmlsDeepPhe.dave.inprogress.xml";
+    File xmlFileAlternative1 = new File(xmlFileNameAlternative1);
+    File xmlFileAlternative2 = new File(xmlFileNameAlternative2);
+    if(xmlFileAlternative1.exists()) {
+      LOGGER.info("processing xml file: " + xmlFileNameAlternative1);
+      processXmlFile(jCas, xmlFileAlternative1);
+    } else if(xmlFileAlternative2.exists()){
+      LOGGER.info("processing xml file: " + xmlFileNameAlternative2);
+      processXmlFile(jCas, xmlFileAlternative2);      
+    } else {
+      throw new IllegalArgumentException("no Anafora XML file found for " + textFileName);
+    }
   }
   
   private static void processXmlFile(JCas jCas, File xmlFile) throws AnalysisEngineProcessException{

Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisXmiGenerationPipeline.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisXmiGenerationPipeline.java?rev=1707152&r1=1707151&r2=1707152&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisXmiGenerationPipeline.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisXmiGenerationPipeline.java Tue Oct  6 21:40:52 2015
@@ -39,14 +39,19 @@ import com.google.common.io.CharStreams;
 
 public class MetastasisXmiGenerationPipeline {
 
-  public static final File INPUT_DIR = new File("/Users/dima/Boston/Data/DeepPhe/Metastasis/patient93_report028_NOTE/");
-  public static final String OUTPUT_DIR = "/Users/Dima/Boston/Out/";
+  public static final File ANAFORA_ANNOTATIONS_DIR = new File("/Users/dima/Boston/Data/DeepPhe/Metastasis/");
+  public static final String XMI_OUTPUT_DIR = "/Users/Dima/Boston/Out/";
   public static final String GOLD_VIEW_NAME = "GoldView";
 
   public static void main(String[] args) throws Exception {
-
+    
     List<File> files = new ArrayList<>();
-    files.add(new File("/Users/dima/Boston/Data/DeepPhe/Metastasis/patient93_report028_NOTE/patient93_report028_NOTE"));
+    // notes have the same names as the directories in which they exist
+    for(File anaforaNoteDir : ANAFORA_ANNOTATIONS_DIR.listFiles()) {
+      String noteFileName = anaforaNoteDir.getName(); 
+      String noteFullPath = anaforaNoteDir.getAbsolutePath() + "/" + noteFileName;
+      files.add(new File(noteFullPath));
+    }
     
     CollectionReader reader = UriCollectionReader.getCollectionReaderFromFiles(files);
     AnalysisEngine engine = getXMIWritingPreprocessorAggregateBuilder().createAggregate();
@@ -75,16 +80,13 @@ public class MetastasisXmiGenerationPipe
         CAS.NAME_DEFAULT_SOFA,
         GOLD_VIEW_NAME);
     
-    builder.add(
-        MetastasisAnaforaXMLReader.getDescription(INPUT_DIR),
-        CAS.NAME_DEFAULT_SOFA,
-        GOLD_VIEW_NAME);
+    builder.add(MetastasisAnaforaXMLReader.getDescription());
 
     // write out the CAS after all the above annotations
     builder.add(AnalysisEngineFactory.createEngineDescription(
         XMIWriter.class,
         XMIWriter.PARAM_XMI_DIRECTORY,
-        OUTPUT_DIR));
+        XMI_OUTPUT_DIR));
 
     return builder;
   }