You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ja...@apache.org on 2013/08/14 23:07:03 UTC

svn commit: r1514050 - /ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/NegExCorpusReader.java

Author: james-masanz
Date: Wed Aug 14 21:07:02 2013
New Revision: 1514050

URL: http://svn.apache.org/r1514050
Log:
cleanup file some

Modified:
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/NegExCorpusReader.java

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/NegExCorpusReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/NegExCorpusReader.java?rev=1514050&r1=1514049&r2=1514050&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/NegExCorpusReader.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/NegExCorpusReader.java Wed Aug 14 21:07:02 2013
@@ -19,112 +19,26 @@
 package org.apache.ctakes.assertion.cr;
 
 import java.io.BufferedReader;
-import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
-import java.net.URI;
-import java.net.URISyntaxException;
 import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 
 import org.apache.ctakes.assertion.util.AssertionConst;
-import org.apache.ctakes.core.knowtator.KnowtatorAnnotation;
-import org.apache.ctakes.core.knowtator.KnowtatorXMLParser;
 import org.apache.ctakes.core.util.CtakesFileNamer;
-import org.apache.ctakes.core.util.SHARPKnowtatorXMLDefaults;
 import org.apache.ctakes.typesystem.type.constants.CONST;
-import org.apache.ctakes.typesystem.type.refsem.BodyLaterality;
-import org.apache.ctakes.typesystem.type.refsem.BodySide;
-import org.apache.ctakes.typesystem.type.refsem.Course;
-import org.apache.ctakes.typesystem.type.refsem.Date;
-import org.apache.ctakes.typesystem.type.refsem.Event;
-import org.apache.ctakes.typesystem.type.refsem.EventProperties;
-import org.apache.ctakes.typesystem.type.refsem.LabReferenceRange;
-import org.apache.ctakes.typesystem.type.refsem.LabValue;
-import org.apache.ctakes.typesystem.type.refsem.MedicationDosage;
-import org.apache.ctakes.typesystem.type.refsem.MedicationDuration;
-import org.apache.ctakes.typesystem.type.refsem.MedicationForm;
-import org.apache.ctakes.typesystem.type.refsem.MedicationFrequency;
-import org.apache.ctakes.typesystem.type.refsem.MedicationRoute;
-import org.apache.ctakes.typesystem.type.refsem.MedicationStatusChange;
-import org.apache.ctakes.typesystem.type.refsem.MedicationStrength;
-import org.apache.ctakes.typesystem.type.refsem.OntologyConcept;
-import org.apache.ctakes.typesystem.type.refsem.ProcedureDevice;
-import org.apache.ctakes.typesystem.type.refsem.ProcedureMethod;
-import org.apache.ctakes.typesystem.type.refsem.Severity;
-import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
-import org.apache.ctakes.typesystem.type.relation.AffectsTextRelation;
-import org.apache.ctakes.typesystem.type.relation.AspectualTextRelation;
-import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
-import org.apache.ctakes.typesystem.type.relation.ComplicatesDisruptsTextRelation;
-import org.apache.ctakes.typesystem.type.relation.DegreeOfTextRelation;
-import org.apache.ctakes.typesystem.type.relation.LocationOfTextRelation;
-import org.apache.ctakes.typesystem.type.relation.ManagesTreatsTextRelation;
-import org.apache.ctakes.typesystem.type.relation.ManifestationOfTextRelation;
-import org.apache.ctakes.typesystem.type.relation.RelationArgument;
-import org.apache.ctakes.typesystem.type.relation.ResultOfTextRelation;
-import org.apache.ctakes.typesystem.type.relation.TemporalTextRelation;
 import org.apache.ctakes.typesystem.type.structured.DocumentID;
-import org.apache.ctakes.typesystem.type.textsem.AnatomicalSiteMention;
-import org.apache.ctakes.typesystem.type.textsem.BodyLateralityModifier;
-import org.apache.ctakes.typesystem.type.textsem.BodySideModifier;
-import org.apache.ctakes.typesystem.type.textsem.ConditionalModifier;
-import org.apache.ctakes.typesystem.type.textsem.CourseModifier;
-import org.apache.ctakes.typesystem.type.textsem.DiseaseDisorderMention;
-import org.apache.ctakes.typesystem.type.textsem.EntityMention;
-import org.apache.ctakes.typesystem.type.textsem.EventMention;
-import org.apache.ctakes.typesystem.type.textsem.GenericModifier;
-import org.apache.ctakes.typesystem.type.textsem.HistoryOfModifier;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.LabEstimatedModifier;
-import org.apache.ctakes.typesystem.type.textsem.LabInterpretationModifier;
-import org.apache.ctakes.typesystem.type.textsem.LabMention;
-import org.apache.ctakes.typesystem.type.textsem.LabReferenceRangeModifier;
-import org.apache.ctakes.typesystem.type.textsem.LabValueModifier;
-import org.apache.ctakes.typesystem.type.textsem.MedicationAllergyModifier;
-import org.apache.ctakes.typesystem.type.textsem.MedicationDosageModifier;
-import org.apache.ctakes.typesystem.type.textsem.MedicationDurationModifier;
-import org.apache.ctakes.typesystem.type.textsem.MedicationFormModifier;
-import org.apache.ctakes.typesystem.type.textsem.MedicationFrequencyModifier;
-import org.apache.ctakes.typesystem.type.textsem.MedicationMention;
-import org.apache.ctakes.typesystem.type.textsem.MedicationRouteModifier;
-import org.apache.ctakes.typesystem.type.textsem.MedicationStatusChangeModifier;
-import org.apache.ctakes.typesystem.type.textsem.MedicationStrengthModifier;
-import org.apache.ctakes.typesystem.type.textsem.Modifier;
-import org.apache.ctakes.typesystem.type.textsem.PolarityModifier;
-import org.apache.ctakes.typesystem.type.textsem.ProcedureDeviceModifier;
-import org.apache.ctakes.typesystem.type.textsem.ProcedureMention;
-import org.apache.ctakes.typesystem.type.textsem.ProcedureMethodModifier;
-import org.apache.ctakes.typesystem.type.textsem.SeverityModifier;
-import org.apache.ctakes.typesystem.type.textsem.SignSymptomMention;
-import org.apache.ctakes.typesystem.type.textsem.SubjectModifier;
-import org.apache.ctakes.typesystem.type.textsem.TimeMention;
-import org.apache.ctakes.typesystem.type.textsem.UncertaintyModifier;
 import org.apache.log4j.Logger;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.Feature;
 import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.cas.FSArray;
-import org.apache.uima.jcas.cas.TOP;
-import org.apache.uima.jcas.tcas.Annotation;
-import org.jdom2.JDOMException;
 import org.uimafit.component.JCasAnnotator_ImplBase;
 import org.uimafit.component.xwriter.XWriter;
-import org.uimafit.descriptor.ConfigurationParameter;
 import org.uimafit.factory.AnalysisEngineFactory;
-import org.uimafit.util.JCasUtil;
 
-import com.google.common.base.Charsets;
 import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
-import com.google.common.io.Files;
 
 /**
  * assumes knowtator xml files are in "exported-xml" subdirectory
@@ -134,18 +48,6 @@ import com.google.common.io.Files;
 public class NegExCorpusReader extends JCasAnnotator_ImplBase {
   static Logger LOGGER = Logger.getLogger(NegExCorpusReader.class);
   
-  public static final String PARAM_TEXT_DIRECTORY = "TextDirectory";
-  @ConfigurationParameter(
-      name = PARAM_TEXT_DIRECTORY,
-      description = "directory containing the text files (if DocumentIDs are just filenames); "
-          + "defaults to assuming that DocumentIDs are full file paths")
-  private File textDirectory;
-  
-  public static final String PARAM_SET_DEFAULTS = "SetDefaults";
-  @ConfigurationParameter(
-      name = PARAM_SET_DEFAULTS,
-      description = "whether or not to set default attribute values if no annotation is present")
-  private boolean setDefaults;
 
   private static final Map<String, String> SUBJECT_KNOWTATOR_TO_UIMA_MAP;
   static {
@@ -158,93 +60,12 @@ public class NegExCorpusReader extends J
     SUBJECT_KNOWTATOR_TO_UIMA_MAP.put("other", CONST.ATTR_SUBJECT_OTHER);
   }
   
-  /**
-   * Get the URI that the text in this class was loaded from
-   */
-  protected URI getTextURI(JCas jCas) throws AnalysisEngineProcessException {
-
-	  String textPath = JCasUtil.selectSingle(jCas, DocumentID.class).getDocumentID();
-	  if (this.textDirectory != null) {
-		  textPath = this.textDirectory + File.separator +  textPath;
-	  }
-
-	  URI uri;
-	  try {
-		  uri = new URI(textPath);
-	  } catch (URISyntaxException e) {
-		  throw new AnalysisEngineProcessException(e);
-	  }
-
-	  //LOGGER.info("textPath = " + textPath);
-	  //LOGGER.info("uri = " + uri);
-	  
-	  
-	  
-	  
-	  //File tmpFile = new File(textPath); // Note this does not work with something like "file:/C:/usr/data/MiPACQ/1/xml/0054074073-0.xml"
-	  //LOGGER.info("tmpFile = " + tmpFile);
-	  //URI answer = tmpFile.toURI();
-	  //LOGGER.info("answer = " + answer);
-
-	  return uri;
-
-  }
-  
-
-
-/**
-   * Returns the names of the annotators in the Knowtator files that represent the gold standard
-   */
-  protected static String[] getAnnotatorNames() {
-    return new String[] { "cTAKES , Mayo Clinic", "CU annotator ,", "consensus set annotator team" , "cons annotator team", "cons team", "team" }; // these three are what are used by MiPACQ gold standard
-  }
-  
-
-  private static List<String> getDiseaseDisorderKnowtatorClasses() {
-	  return Arrays.asList(new String [] {"Disorders"}); 
-  }
-  
-  
-  private static List<String> getSignSymptomKnowtatorClasses() {
-	  return Arrays.asList(new String [] {"Sign_Symptom", "Finding"}); 
-  }
-  
-  private static List<String> getProcedureKnowtatorClasses() {
-	  return Arrays.asList(new String [] {
-			  "Diagnostic_procedure",
-			  "Laboratory_procedure",
-			  "Procedures",
-			  "Therapeutic_or_preventive_procedure",
-			  "Intervention",
-			  "Health_care_activity",
-			  "Research_activity"}); 
-  }
-  
-  private static List<String> getMedicationKnowtatorClasses() {
-	  return Arrays.asList(new String [] {"Chemicals_and_drugs", "Pharmacologic_substance"}); 
-  }
-  
-  private static List<String> getAnatomyKnowtatorClasses() {
-	  return Arrays.asList(new String [] {"Anatomy"}); 
-  }
-
 
   @Override
   public void process(JCas jCas) throws AnalysisEngineProcessException {
 	  //
   }
   
-  static String format(Annotation ann) {
-    String result;
-    if (ann.getEnd() == Integer.MIN_VALUE || ann.getBegin() == Integer.MAX_VALUE) {
-      result = "<no-spanned-text>";
-    } else {
-      result = String.format("\"%s\"[%d,%d]", ann.getCoveredText(), ann.getBegin(), ann.getEnd());
-    }
-    return String.format("%s(%s)", ann.getClass().getSimpleName(), result);
-  }
-  
-