You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2015/10/07 00:35:16 UTC
svn commit: r1707162 - in
/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis:
MetastasisAnaforaXMLReader.java MetastasisXmiGenerationPipeline.java
Author: dligach
Date: Tue Oct 6 22:35:15 2015
New Revision: 1707162
URL: http://svn.apache.org/viewvc?rev=1707162&view=rev
Log:
fixed a bug where disease/disorders with no linked body sites were still added to the cas as location_of relations
Modified:
ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisAnaforaXMLReader.java
ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisXmiGenerationPipeline.java
Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisAnaforaXMLReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisAnaforaXMLReader.java?rev=1707162&r1=1707161&r2=1707162&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisAnaforaXMLReader.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisAnaforaXMLReader.java Tue Oct 6 22:35:15 2015
@@ -33,7 +33,6 @@ import org.apache.log4j.Logger;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
-import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
@@ -89,15 +88,17 @@ public class MetastasisAnaforaXMLReader
for (Element annotationsElem : dataElem.getChildren("annotations")) {
+ // map annotation id to annotation object
Map<String, IdentifiedAnnotation> idToAnnotation = Maps.newHashMap();
+ // map disease/disorder id to body site ids
Map<String, List<String>> diseaseDisorderToAnatomicalSites = Maps.newHashMap();
for (Element entityElem : annotationsElem.getChildren("entity")) {
- String id = removeSingleChildText(entityElem, "id", null);
- Element spanElem = removeSingleChild(entityElem, "span", id);
- String type = removeSingleChildText(entityElem, "type", id);
- Element propertiesElem = removeSingleChild(entityElem, "properties", id);
+ String annotationId = removeSingleChildText(entityElem, "id", null);
+ Element spanElem = removeSingleChild(entityElem, "span", annotationId);
+ String type = removeSingleChildText(entityElem, "type", annotationId);
+ Element propertiesElem = removeSingleChild(entityElem, "properties", annotationId);
// UIMA doesn't support disjoint spans, so take the span enclosing everything
int begin = Integer.MAX_VALUE;
@@ -105,7 +106,7 @@ public class MetastasisAnaforaXMLReader
for(String spanString : spanElem.getText().split(";")) {
String[] beginEndStrings = spanString.split(",");
if (beginEndStrings.length != 2) {
- error("span not of the format 'number,number'", id);
+ error("span not of the format 'number,number'", annotationId);
}
int spanBegin = Integer.parseInt(beginEndStrings[0]);
int spanEnd = Integer.parseInt(beginEndStrings[1]);
@@ -117,25 +118,30 @@ public class MetastasisAnaforaXMLReader
}
}
+ // disease/disorder and metastasis are both represented as disease disorder mentions
if(type.equals("Disease_Disorder") || type.equals("Metastasis")) {
DiseaseDisorderMention diseaseDisorderMention = new DiseaseDisorderMention(jCas, begin, end);
diseaseDisorderMention.addToIndexes();
- idToAnnotation.put(id, diseaseDisorderMention);
+ idToAnnotation.put(annotationId, diseaseDisorderMention);
List<String> anatomicalSiteIds = Lists.newArrayList();
for(Element child : propertiesElem.getChildren("body_location")) {
String bodyLocationId = child.getText();
- anatomicalSiteIds.add(bodyLocationId);
- }
- diseaseDisorderToAnatomicalSites.put(id, anatomicalSiteIds);
+ if(! bodyLocationId.equals("")) {
+ anatomicalSiteIds.add(bodyLocationId);
+ }
+ }
+ diseaseDisorderToAnatomicalSites.put(annotationId, anatomicalSiteIds);
} else if(type.equals("Anatomical_site")) {
AnatomicalSiteMention anatomicalSiteMention = new AnatomicalSiteMention(jCas, begin, end);
anatomicalSiteMention.addToIndexes();
- idToAnnotation.put(id, anatomicalSiteMention);
+ idToAnnotation.put(annotationId, anatomicalSiteMention);
} else {
continue; // not going to worry about other types for the moment
}
}
+ // now create a location_of relation in the cas
+ // whenever we see a body site linked to a disease/disorder or a metastasis
for(String diseaseDisorderId : diseaseDisorderToAnatomicalSites.keySet()) {
IdentifiedAnnotation diseaseDisorderMention = idToAnnotation.get(diseaseDisorderId);
for(String anatomicalSiteId : diseaseDisorderToAnatomicalSites.get(diseaseDisorderId)) {
Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisXmiGenerationPipeline.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisXmiGenerationPipeline.java?rev=1707162&r1=1707161&r2=1707162&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisXmiGenerationPipeline.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisXmiGenerationPipeline.java Tue Oct 6 22:35:15 2015
@@ -13,6 +13,7 @@ import org.apache.ctakes.relationextract
import org.apache.ctakes.relationextractor.eval.SHARPXMI.DocumentIDAnnotator;
import org.apache.ctakes.typesystem.type.syntax.Chunk;
import org.apache.ctakes.typesystem.type.textspan.LookupWindowAnnotation;
+import org.apache.uima.UIMAFramework;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -28,6 +29,8 @@ import org.apache.uima.fit.pipeline.Simp
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.XMLInputSource;
+import org.apache.uima.util.XMLParser;
import org.apache.uima.util.XMLSerializer;
import org.cleartk.util.ViewUriUtil;
import org.cleartk.util.ae.UriToDocumentTextAnnotator;
@@ -63,10 +66,10 @@ public class MetastasisXmiGenerationPipe
AggregateBuilder builder = new AggregateBuilder();
builder.add(UriToDocumentTextAnnotator.getDescription());
-// File preprocessDescFile = new File("desc/analysis_engine/RelationExtractorPreprocessor.xml");
-// XMLParser parser = UIMAFramework.getXMLParser();
-// XMLInputSource source = new XMLInputSource(preprocessDescFile);
-// builder.add(parser.parseAnalysisEngineDescription(source));
+ File preprocessDescFile = new File("desc/analysis_engine/RelationExtractorPreprocessor.xml");
+ XMLParser parser = UIMAFramework.getXMLParser();
+ XMLInputSource source = new XMLInputSource(preprocessDescFile);
+ builder.add(parser.parseAnalysisEngineDescription(source));
builder.add(AnalysisEngineFactory.createEngineDescription(
ViewCreatorAnnotator.class,
@@ -80,7 +83,10 @@ public class MetastasisXmiGenerationPipe
CAS.NAME_DEFAULT_SOFA,
GOLD_VIEW_NAME);
- builder.add(MetastasisAnaforaXMLReader.getDescription());
+ builder.add(
+ MetastasisAnaforaXMLReader.getDescription(),
+ CAS.NAME_DEFAULT_SOFA,
+ GOLD_VIEW_NAME); // this tells it to create all annotation in gold view!
// write out the CAS after all the above annotations
builder.add(AnalysisEngineFactory.createEngineDescription(