You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ma...@apache.org on 2012/11/20 18:44:33 UTC
svn commit: r1411758 [1/2] - in /incubator/ctakes/trunk/ctakes-assertion:
.settings/ resources/launch/ src/main/java/org/apache/ctakes/assertion/cr/
src/main/java/org/apache/ctakes/assertion/eval/
src/main/java/org/apache/ctakes/assertion/medfacts/clea...
Author: mattcoarr
Date: Tue Nov 20 17:44:30 2012
New Revision: 1411758
URL: http://svn.apache.org/viewvc?rev=1411758&view=rev
Log:
more work on refactoring assertion module to use cleartk and uimafit. this includes separate analysis engines for each attribute
Added:
incubator/ctakes/trunk/ctakes-assertion/.settings/org.eclipse.m2e.core.prefs
incubator/ctakes/trunk/ctakes-assertion/resources/launch/GoldEntityAndAttributeReaderPipelineForSeedCorpus.launch
incubator/ctakes/trunk/ctakes-assertion/resources/launch/ctakes__assertion_eval.launch
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/ArgumentInfo.java
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/GoldEntityAndAttributeReader.java
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/RelationInfo.java
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/Span.java
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/XMLReader.java
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/XmiCollectionReaderCtakes.java
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/UncertaintyCleartkAnalysisEngine.java
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/CtakesFileNamer.java
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipeline.java
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/relationextractor/
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/relationextractor/cr/
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/relationextractor/cr/Mapper.java
Modified:
incubator/ctakes/trunk/ctakes-assertion/.settings/org.eclipse.core.resources.prefs
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvalBasedOnModifier.java
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java
Modified: incubator/ctakes/trunk/ctakes-assertion/.settings/org.eclipse.core.resources.prefs
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/.settings/org.eclipse.core.resources.prefs?rev=1411758&r1=1411757&r2=1411758&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/.settings/org.eclipse.core.resources.prefs (original)
+++ incubator/ctakes/trunk/ctakes-assertion/.settings/org.eclipse.core.resources.prefs Tue Nov 20 17:44:30 2012
@@ -1,5 +1,4 @@
eclipse.preferences.version=1
encoding//src/main/java=UTF-8
encoding//src/main/resources=UTF-8
-encoding//target/generated-sources/jcasgen=UTF-8
encoding/<project>=UTF-8
Added: incubator/ctakes/trunk/ctakes-assertion/.settings/org.eclipse.m2e.core.prefs
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/.settings/org.eclipse.m2e.core.prefs?rev=1411758&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/.settings/org.eclipse.m2e.core.prefs (added)
+++ incubator/ctakes/trunk/ctakes-assertion/.settings/org.eclipse.m2e.core.prefs Tue Nov 20 17:44:30 2012
@@ -0,0 +1,4 @@
+activeProfiles=
+eclipse.preferences.version=1
+resolveWorkspaceProjects=true
+version=1
Added: incubator/ctakes/trunk/ctakes-assertion/resources/launch/GoldEntityAndAttributeReaderPipelineForSeedCorpus.launch
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/resources/launch/GoldEntityAndAttributeReaderPipelineForSeedCorpus.launch?rev=1411758&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/resources/launch/GoldEntityAndAttributeReaderPipelineForSeedCorpus.launch (added)
+++ incubator/ctakes/trunk/ctakes-assertion/resources/launch/GoldEntityAndAttributeReaderPipelineForSeedCorpus.launch Tue Nov 20 17:44:30 2012
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication">
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
+<listEntry value="/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java"/>
+</listAttribute>
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
+<listEntry value="1"/>
+</listAttribute>
+<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/>
+<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="org.apache.ctakes.assertion.pipelines.GoldEntityAndAttributeReaderPipelineForSeedCorpus"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="/work/medfacts/sharp/data/2012-10-16_full_data_set_updated/Seed_Corpus/Mayo/UMLS_CEM"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="ctakes-assertion"/>
+<stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/>
+</launchConfiguration>
Added: incubator/ctakes/trunk/ctakes-assertion/resources/launch/ctakes__assertion_eval.launch
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/resources/launch/ctakes__assertion_eval.launch?rev=1411758&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/resources/launch/ctakes__assertion_eval.launch (added)
+++ incubator/ctakes/trunk/ctakes-assertion/resources/launch/ctakes__assertion_eval.launch Tue Nov 20 17:44:30 2012
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication">
+<stringAttribute key="bad_container_name" value="/ctakes-assertion/resour"/>
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
+<listEntry value="/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvalBasedOnModifier.java"/>
+</listAttribute>
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
+<listEntry value="1"/>
+</listAttribute>
+<stringAttribute key="org.eclipse.debug.ui.ATTR_CAPTURE_IN_FILE" value="/tmp/assertion.log"/>
+<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/>
+<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="org.apache.ctakes.assertion.eval.AssertionEvalBasedOnModifier"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="--train-dir /sharp_data/train --test-dir /sharp_data/test --models-dir /sharp_data/model/eval.model"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="ctakes-assertion"/>
+<stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/>
+</launchConfiguration>
Added: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/ArgumentInfo.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/ArgumentInfo.java?rev=1411758&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/ArgumentInfo.java (added)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/ArgumentInfo.java Tue Nov 20 17:44:30 2012
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.assertion.cr;
+
+/**
+ * Information about a relation argument.
+ *
+ * It typically looks something like this in a knowtator xml file:
+ *
+ * <complexSlotMention id="Relations_Sept21_Schema_Set02_Instance_90018">
+ * <mentionSlot id="Related_to" />
+ * <complexSlotMentionValue value="Relations_Sept21_Schema_Instance_30350" />
+ * </complexSlotMention>
+ *
+ * This xml is parsed and stored in this class.
+ *
+ * @author dmitriy dligach
+ *
+ */
+public class ArgumentInfo {
+
+ public String value; // value of "value" attribute above
+ public String role; // e.g. "Related_to"
+
+ ArgumentInfo(String value, String role) {
+ this.value = value;
+ this.role = role;
+ }
+}
Added: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/GoldEntityAndAttributeReader.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/GoldEntityAndAttributeReader.java?rev=1411758&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/GoldEntityAndAttributeReader.java (added)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/GoldEntityAndAttributeReader.java Tue Nov 20 17:44:30 2012
@@ -0,0 +1,314 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.ctakes.assertion.cr;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.ctakes.relationextractor.cr.Mapper;
+import org.jdom.Document;
+import org.jdom.JDOMException;
+import org.jdom.input.SAXBuilder;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+
+import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+
+/**
+ * Read named entity annotations from knowtator xml files into the CAS
+ *
+ * @author stephen wu
+ *
+ */
+public class GoldEntityAndAttributeReader extends JCasAnnotator_ImplBase {
+
+ // paramater that should contain the path to knowtator xml files
+ public static final String PARAM_INPUTDIR = "InputDirectory";
+ // path to knowtator xml files
+ public static String inputDirectory;
+ // counter for assigning entity ids
+ public int identifiedAnnotationId;
+ private boolean VERBOSE = true;
+
+ @Override
+ public void initialize(UimaContext aContext) throws ResourceInitializationException {
+ super.initialize(aContext);
+
+ inputDirectory = (String)aContext.getConfigParameterValue(PARAM_INPUTDIR);
+ identifiedAnnotationId = 0;
+ }
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+
+ JCas initView;
+ try {
+ initView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
+ } catch (CASException e) {
+ throw new AnalysisEngineProcessException(e);
+ }
+ String goldFilePath = inputDirectory + DocumentIDAnnotationUtil.getDocumentID(jCas) + ".knowtator.xml";
+
+ SAXBuilder builder = new SAXBuilder();
+ Document document;
+ try {
+ document = builder.build(new File(goldFilePath));
+ } catch (JDOMException e) {
+ throw new AnalysisEngineProcessException(e);
+ } catch (Exception e) { // TODO this should be IOException, but the command-line maven build was breaking
+ throw new AnalysisEngineProcessException(e);
+ }
+
+ // map knowtator mention ids to entity offsets
+ HashMap<String, ArrayList<Span>> allMentions = XMLReader.getEntityMentions(document);
+ // map knowtator mention ids to entity types
+ HashMap<String, String> entityTypes = XMLReader.getEntityTypes(document);
+ // map knowtator mention ids to the ids of mention-attributes (or attributes themselves)
+ HashMap<String,List<String>> mentionAttr = XMLReader.getEntityAttributes(document);
+ // map knowtator mention-attribute ids to attributes
+ String[] complexSlotMention = {"complexSlotMention"};
+ HashMap<String, ArgumentInfo> attrPtr = XMLReader.getAttributes(document,complexSlotMention);
+ // map knowtator attribute ids to role-value pairs
+ HashMap<String, ArgumentInfo> attrs = XMLReader.getAttributes(document);
+
+ // pare down hashmap based on types -- keep only NEs
+ HashMap<String, ArrayList<Span>> neMentions = filterToNamedEntitiesOnly(allMentions,entityTypes);
+
+ for(Map.Entry<String, ArrayList<Span>> mention : neMentions.entrySet()) {
+ String mentionId = mention.getKey();
+
+// // pare down what to consider -- keep only valid NEs, discard modifiers
+// if (!filterToNamedEntitiesOnly(allMentions,
+// mentionId,entityTypes.get(mentionId))) {
+// continue;
+// }
+
+ Span first = null;
+ Span last = null;
+ // for disjoint spans, just ignore the gap
+ first = mention.getValue().get(0);
+ last = mention.getValue().get(mention.getValue().size() - 1);
+
+ // put entity and attributes into the CAS
+ // choose either entity or event
+ IdentifiedAnnotation eMention;
+ int type = Mapper.getEntityTypeId(entityTypes.get(mentionId));
+ if (type==CONST.NE_TYPE_ID_ANATOMICAL_SITE) {
+ eMention = new EntityMention(initView, first.start, last.end);
+ } else if (type==CONST.NE_TYPE_ID_DISORDER
+ || type==CONST.NE_TYPE_ID_DRUG
+ || type==CONST.NE_TYPE_ID_FINDING
+ || type==CONST.NE_TYPE_ID_PROCEDURE
+ || type==CONST.NE_TYPE_ID_ANATOMICAL_SITE
+ ) {
+ eMention = new EventMention(initView, first.start, last.end);
+ } else {
+ eMention = new IdentifiedAnnotation(initView, first.start, last.end);
+ }
+
+ // set easy attributes
+ eMention.setTypeID(Mapper.getEntityTypeId(entityTypes.get(mentionId)));
+ eMention.setId(identifiedAnnotationId++);
+ eMention.setDiscoveryTechnique(CONST.NE_DISCOVERY_TECH_GOLD_ANNOTATION);
+ eMention.setConfidence(1);
+
+ if (mentionId.endsWith("4351")) {
+ System.out.println();
+ }
+
+ List<ArgumentInfo> assocAttributes = getLeafAttributes(mentionId,
+ mentionAttr,attrPtr,attrs,new ArrayList<ArgumentInfo>());
+
+ for (ArgumentInfo a : assocAttributes) {
+
+// // set harder attributes from cas -- look through all attribute ids attached to this mentionId
+// for (String attrId : mentionAttr.get(mentionId) ) {
+// // make sure this attribute was actually somewhere in the knowtator file
+// if (!attrs.containsKey(attrId)) {
+// if (VERBOSE) { System.err.println("WARNING: attribute not found: "+attrId); }
+// continue;
+// }
+
+ // look up the attribute id and set values accordingly
+ checkForAttrValue(eMention, a.role, a.value);
+ }
+
+ // add to CAS
+ eMention.addToIndexes();
+ }
+ }
+
+ private List<ArgumentInfo> getLeafAttributes(String id,
+ HashMap<String, List<String>> mentionAttr,
+ HashMap<String, ArgumentInfo> attrPtr, HashMap<String, ArgumentInfo> attrs, List<ArgumentInfo> output) {
+
+ // if this is a mention id
+ if (mentionAttr.containsKey(id)) {
+// if (mentionAttr.get(id).size()>1 && VERBOSE) {
+// System.err.println("WARNING: expected an attribute's mention to have only one attr, but not so");
+// }
+ for (String attrId : mentionAttr.get(id)) {
+ // assumes that if you're in an attribute mention, you only have one value
+ if (attrPtr.containsKey(attrId)) {
+ ArgumentInfo a = attrPtr.get(attrId);
+ if ( !isRelationArgument(attrPtr.get(attrId).role) )
+ getLeafAttributes(attrPtr.get(attrId).value, mentionAttr, attrPtr, attrs, output);
+
+ } else if (attrs.containsKey(attrId)){
+ ArgumentInfo a = attrs.get(attrId);
+ if ( !isRelationArgument(attrs.get(attrId).role) )
+ output.add(attrs.get(attrId));
+ }
+ }
+ }
+
+ // if this is an attribute id
+ else if (attrPtr.containsKey(id)) {
+ if ( !attrPtr.get(id).role.equals("Related_to_CU")
+ & !attrPtr.get(id).role.equals("Argument_CU") )
+ getLeafAttributes(attrPtr.get(id).value, mentionAttr, attrPtr, attrs, output);
+ } else if (attrs.containsKey(id)){
+ if ( !attrs.get(id).role.equals("Related_to_CU")
+ && !attrs.get(id).role.equals("Argument_CU") )
+ output.add(attrs.get(id));
+ }
+
+ return output;
+ }
+
+ private boolean isRelationArgument(String role) {
+ if (normalizeRoleName(role).equals("Related_to")) {
+ return true;
+ } else if (normalizeRoleName(role).equals("Argument")) {
+ return true;
+ }
+ return false;
+ }
+
+ private void checkForAttrValue(IdentifiedAnnotation eMention, String role,
+ String value) {
+ if (role.contains("_normalization")) {
+ if (role.startsWith("conditional")) {
+ eMention.setConditional(Boolean.valueOf(value));
+ } else if (role.startsWith("generic")) {
+ eMention.setGeneric(Boolean.valueOf(value));
+ } else if (role.startsWith("negation_indicator")) {
+ // assumes that the string from Knowtator is exactly "negation_present"
+ if (value.equals("negation_present")) {
+ eMention.setPolarity(CONST.NE_POLARITY_NEGATION_PRESENT);
+ } else {
+ eMention.setPolarity(CONST.NE_POLARITY_NEGATION_ABSENT);
+ }
+ } else if (role.startsWith("subject")) {
+ // assumes that the strings from Knowtator are exactly what's in the type system
+ eMention.setSubject(value);
+ } else if (role.startsWith("uncertainty_indicator")) {
+ // assumes that the string from Knowtator is exactly "indicator_present"
+ if (value.equals("indicator_present")) {
+ eMention.setUncertainty(CONST.NE_UNCERTAINTY_PRESENT);
+ } else {
+ eMention.setUncertainty(CONST.NE_UNCERTAINTY_ABSENT);
+ }
+ } else if (role.startsWith("generic")) {
+ eMention.setGeneric(Boolean.valueOf(value));
+ }
+ }
+ }
+
+ // Takes the Knowtator schema value and filters out things that are not NE.
+ // In principle can have a parallel "filterToAttributesOnly"
+ private HashMap<String, ArrayList<Span>> filterToNamedEntitiesOnly(
+ HashMap<String, ArrayList<Span>> entityMentions,
+ HashMap<String, String> entityTypes) {
+ HashMap<String, ArrayList<Span>> newEntityMentions = new HashMap<String, ArrayList<Span>>();
+
+ for (Entry<String, String> etype : entityTypes.entrySet()) {
+ if (etype.getValue().equals("Anatomical_site")
+ || etype.getValue().equals("Disease_Disorder")
+ || etype.getValue().equals("Lab")
+ || etype.getValue().equals("Medications")
+ || etype.getValue().equals("Procedure")
+ || etype.getValue().equals("Sign_symptom")
+ ) {
+ if (entityMentions.containsKey(etype.getKey())) {
+ newEntityMentions.put(etype.getKey(),entityMentions.get(etype.getKey()));
+ }
+ }
+ }
+
+ return newEntityMentions;
+ }
+
+ // Takes the Knowtator schema value and filters out things that are not NE.
+ // In principle can have a parallel "filterToAttributesOnly"
+ private boolean filterToNamedEntitiesOnly(
+ HashMap<String, ArrayList<Span>> entityMentions,
+ String typeKey, String typeValue) {
+
+ if (typeValue.toLowerCase().equals("Anatomical_site")
+ || typeValue.toLowerCase().equals("Disease_Disorder")
+ || typeValue.toLowerCase().equals("Lab")
+ || typeValue.toLowerCase().equals("Medications")
+ || typeValue.toLowerCase().equals("Procedure")
+ || typeValue.toLowerCase().equals("Sign_symptom")
+ ) {
+ if (entityMentions.containsKey(typeKey)) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Convert Argument_CU and Related_to_CU to Argument and Related_to.
+ * This will not be necessary in the future when the data will be
+ * post-processed to remove _CU suffixes.
+ *
+ * Currently mipacq data does not have the suffixes and sharp data does.
+ */
+ private static String normalizeRoleName(String role) {
+
+ if(role.equals("Argument_CU")) {
+ return "Argument";
+ }
+
+ if(role.equals("Related_to_CU")) {
+ return "Related_to";
+ }
+
+ return role;
+
+ }
+}
\ No newline at end of file
Added: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/RelationInfo.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/RelationInfo.java?rev=1411758&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/RelationInfo.java (added)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/RelationInfo.java Tue Nov 20 17:44:30 2012
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.assertion.cr;
+
+import java.util.HashSet;
+
+import com.google.common.base.Objects;
+
+/**
+ * Information about a relation that includes the info about the relation's arguments.
+ *
+ * @author dmitriy dligach
+ *
+ */
+public class RelationInfo {
+
+ public String id1; // id of the first argument
+ public String id2; // id of the second argument
+ public String role1; // position of first arg (e.g. Argument)
+ public String role2; // semantic type of second arg (e.g. Related_to)
+ public String category; // relation type e.g. co_occurs_with
+
+ RelationInfo(String id1, String id2, String role1, String role2, String category) {
+ this.id1 = id1; // id of the first argument
+ this.id2 = id2; // id of the second argument
+ this.role1 = role1;
+ this.role2 = role2;
+ this.category = category;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("<%s, %s, %s, %s, %s>", id1, id2, role1, role2, category);
+ }
+
+ /*
+ * Returns true if two relation instances (represented as RelationInfo objects)
+ * have the same arguments. Useful for debugging mipacq data which may contain duplicate relation instances.
+ */
+ @Override
+ public boolean equals(Object object) {
+
+ boolean isEqual = false;
+
+ if(this.getClass() == object.getClass()) {
+ RelationInfo relationInfo = (RelationInfo) object;
+ isEqual = (this.id1.equals(relationInfo.id1) && this.id2.equals(relationInfo.id2));
+ }
+
+ return isEqual;
+ }
+
+ /*
+ * Hash code must match equals() method.
+ */
+ @Override
+ public int hashCode()
+ {
+ return Objects.hashCode(this.id1, this.id2);
+ }
+
+ public static void main(String[] args) {
+
+ RelationInfo ri1 = new RelationInfo("1", "2", "Argument", "Related_to", "location_of");
+ RelationInfo ri2 = new RelationInfo("1", "2", "zzzzzzzz", "xxxxxxxxxx", "yyyyyyyyyyy");
+ RelationInfo ri3 = new RelationInfo("1", "2", "kkkkkkkk", "llllllllll", "mmmmmmmmmmm");
+
+ System.out.println(ri1.equals(ri2));
+
+ HashSet<RelationInfo> uniqueRelations = new HashSet<RelationInfo>();
+
+ System.out.println(ri1.hashCode() + "\t" + ri2.hashCode());
+
+ uniqueRelations.add(ri1);
+ uniqueRelations.add(ri2);
+
+ System.out.println(uniqueRelations);
+
+ System.out.println(uniqueRelations.contains(ri1));
+ System.out.println(uniqueRelations.contains(ri2));
+ System.out.println(uniqueRelations.contains(ri3));
+ }
+}
\ No newline at end of file
Added: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/Span.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/Span.java?rev=1411758&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/Span.java (added)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/Span.java Tue Nov 20 17:44:30 2012
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.assertion.cr;
+
+
+/**
+ * Represents span of a named entity
+ *
+ * @author dmitriy dligach
+ *
+ */
+public class Span {
+
+ public int start;
+ public int end;
+
+ public Span(int start, int end) {
+ this.start = start;
+ this.end = end;
+ }
+
+ @Override
+ public boolean equals(Object object) {
+
+ boolean isEqual = false;
+
+ if(object instanceof Span) {
+ Span span = (Span) object;
+ isEqual = ((this.start == span.start) && (this.end == span.end));
+ }
+
+ return isEqual;
+ }
+
+// @Override
+// public int hashCode()
+// {
+// return Objects.hashCode(start, end);
+// }
+
+ public String toString() {
+ return String.format("%d -- %d", start, end);
+ }
+}
\ No newline at end of file
Added: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/XMLReader.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/XMLReader.java?rev=1411758&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/XMLReader.java (added)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/XMLReader.java Tue Nov 20 17:44:30 2012
@@ -0,0 +1,262 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.assertion.cr;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+
+import org.jdom.Document;
+import org.jdom.Element;
+
+public class XMLReader {
+
+ static String[] attrHeadings = {"booleanSlotMention","stringSlotMention"};
+ private static boolean VERBOSE = true;
+
+ /**
+ * Get spans of named entity annotations indexed on knowtator mention id
+ */
+ public static HashMap<String, ArrayList<Span>> getEntityMentions(Document document) {
+
+ // key: mention id, value: list of spans (need a list to handle disjoint spans)
+ HashMap<String, ArrayList<Span>> entityMentions = new HashMap<String, ArrayList<Span>>();
+
+ Element elementRoot = document.getRootElement();
+ List<?> annotations = elementRoot.getChildren("annotation");
+
+ for (int i = 0; i < annotations.size(); i++) {
+ Element elementAnnotation = (Element) annotations.get(i);
+
+ List<?> elementSpans = elementAnnotation.getChildren("span");
+
+ if(elementSpans.size() == 0) {
+ continue; // spanless annotation, e.g. a relation; there should be no spannedText
+ }
+
+ ArrayList<Span> spans = new ArrayList<Span>();
+ for(int j = 0; j < elementSpans.size(); j++) {
+ Element elementSpan = (Element) elementSpans.get(j);
+
+ String start = elementSpan.getAttributeValue("start");
+ String end = elementSpan.getAttributeValue("end");
+
+ Span span = new Span(Integer.parseInt(start), Integer.parseInt(end));
+ spans.add(span);
+ }
+
+ String mentionId = elementAnnotation.getChild("mention").getAttributeValue("id");
+
+ entityMentions.put(mentionId, spans);
+ }
+ return entityMentions;
+ }
+
+ /**
+ * Type of each named entity indexed on mention ids
+ */
+ public static HashMap<String, String> getEntityTypes(Document document) {
+
+ // key: mention id, value: semantic type of the corresponding entity (e.g. "sign_symptom")
+ HashMap<String, String> entityTypes = new HashMap<String, String>();
+
+ Element root = document.getRootElement();
+ List<?> classMentions = root.getChildren("classMention");
+
+ for (int i = 0; i < classMentions.size(); i++) {
+ Element classMention = (Element) classMentions.get(i);
+ String id = classMention.getAttributeValue("id");
+ String cl = classMention.getChildText("mentionClass");
+ entityTypes.put(id, cl);
+ }
+ return entityTypes;
+ }
+
+ /**
+ * Attribute mention IDs of each named entity, indexed on mention ids
+ * Filter based on entity types, so non-NE get left out.
+ */
+ public static HashMap<String, List<String>> getEntityAttributes(Document document) {
+
+ // key: mention id, value: list of attributes associated with this
+ HashMap<String, List<String>> entityAttr = new HashMap<String, List<String>>();
+
+ Element root = document.getRootElement();
+ List<?> classMentions = root.getChildren("classMention");
+
+ for (int i = 0; i < classMentions.size(); i++) {
+ Element classMention = (Element) classMentions.get(i);
+ String id = classMention.getAttributeValue("id");
+
+ List<String> lsAttr = new ArrayList<String>();
+ List lsSlotMention = classMention.getChildren("hasSlotMention");
+
+ for (Object slotMention : lsSlotMention) {
+ String slotid = ((Element) slotMention).getAttributeValue("id");
+ lsAttr.add(slotid);
+ }
+
+ entityAttr.put(id, lsAttr);
+ }
+ return entityAttr;
+ }
+
+ /**
+ * Attributes of each named entity indexed on mention ids. For now, manually handles different types of Slots
+ */
+ public static HashMap<String, ArgumentInfo> getAttributes(Document document) {
+ return getAttributes(document,attrHeadings);
+ }
+
+ /**
+ * Attributes of each named entity indexed on mention ids. For now, manually handles different types of Slots
+ */
+ public static HashMap<String, ArgumentInfo> getAttributes(Document document, String[] headings) {
+
+ // key: mention id value: map from attribute to attribute value
+ HashMap<String, ArgumentInfo> entityAttr = new HashMap<String,ArgumentInfo>();
+
+ Element root = document.getRootElement();
+
+// // key: complexSlotMention id, value: complexSlotMention value
+// List<ArgumentInfo> listSlotMentions = new ArrayList<ArgumentInfo>();
+
+ // read all ??SlotMentions which additional slots
+ for (String heading : headings) {
+ List<?> slotMentions = root.getChildren(heading);
+ for (int i = 0; i < slotMentions.size(); i++) {
+ Element complexSlotMention = (Element) slotMentions.get(i);
+
+ String id = complexSlotMention.getAttributeValue("id");
+ String value = complexSlotMention.getChild(heading+"Value").getAttributeValue("value");
+ String attr = complexSlotMention.getChild("mentionSlot").getAttributeValue("id"); // e.g. "Related_to"
+
+ if (entityAttr.containsKey(id)) {
+ if (VERBOSE ) { System.err.println("WARNING: found more than one attribute in an attribute mention"); }
+ entityAttr.put(id,new ArgumentInfo(value, normalizeName(attr)));
+ } else {
+ entityAttr.put(id,new ArgumentInfo(value, normalizeName(attr)));
+ }
+ // listSlotMentions.put(id, new ArgumentInfo(value, normalizeName(attr)));
+ }
+ }
+// // now read all classMentions which have relation type and arguments (as hasSlotMention(s))
+// List<?> classMentions = root.getChildren("classMention");
+// for (int i = 0; i < classMentions.size(); i++) {
+// Element classMention = (Element) classMentions.get(i);
+// List<?> hasSlotMentions = classMention.getChildren("hasSlotMention");
+//
+// if(hasSlotMentions.size() >= 2) {
+// String relationType = classMention.getChildText("mentionClass");
+//// addRelation(relations, hasSlotMentions, hashComplexSlotMentions, relationType); // save this relation and args
+// }
+// }
+ return entityAttr;
+ }
+
+ public static ArrayList<RelationInfo> getRelations(Document document) {
+
+ ArrayList<RelationInfo> relations = new ArrayList<RelationInfo>();
+
+ Element root = document.getRootElement();
+
+ // key: complexSlotMention id, value: complexSlotMention value
+ HashMap<String, ArgumentInfo> hashComplexSlotMentions = new HashMap<String, ArgumentInfo>();
+
+ // first read all complexSlotMentions which contain argument roles (Related_to or Argument)
+ List<?> complexSlotMentions = root.getChildren("complexSlotMention");
+ for (int i = 0; i < complexSlotMentions.size(); i++) {
+ Element complexSlotMention = (Element) complexSlotMentions.get(i);
+
+ String id = complexSlotMention.getAttributeValue("id");
+ String value = complexSlotMention.getChild("complexSlotMentionValue").getAttributeValue("value");
+ String role = complexSlotMention.getChild("mentionSlot").getAttributeValue("id"); // e.g. "Related_to"
+
+ hashComplexSlotMentions.put(id, new ArgumentInfo(value, normalizeName(role)));
+ }
+
+ // now read all classMentions which have relation type and arguments (as hasSlotMention(s))
+ List<?> classMentions = root.getChildren("classMention");
+ for (int i = 0; i < classMentions.size(); i++) {
+ Element classMention = (Element) classMentions.get(i);
+ List<?> hasSlotMentions = classMention.getChildren("hasSlotMention");
+
+ if(hasSlotMentions.size() >= 2) {
+ String relationType = classMention.getChildText("mentionClass");
+ addRelation(relations, hasSlotMentions, hashComplexSlotMentions, relationType); // save this relation and args
+ }
+ }
+ return relations;
+ }
+
+ private static void addRelation(ArrayList<RelationInfo> relations, List<?> hasSlotMentions,
+ HashMap<String, ArgumentInfo> hashComplexSlotMentions, String relationType) {
+ // add relation arguments and other relation information to the list of relations
+
+ // get the ids of the arguments; sometimes there are three hasSlotMention(s) but not all of them are arguments
+ ArrayList<String> ids = new ArrayList<String>();
+ for(int i = 0; i < hasSlotMentions.size(); i++) {
+ String id = ((Element) hasSlotMentions.get(i)).getAttributeValue("id");
+ if(hashComplexSlotMentions.containsKey(id)) {
+ String role = hashComplexSlotMentions.get(id).role;
+ // check the role explicitly; in sharp data (unlike in mipacq), one
+ // of the hasSlotMention(s) can be a negation attribute with a span
+ if(role.equals("Argument") || role.equals("Related_to")) {
+ ids.add(id);
+ }
+ }
+ }
+
+ // exactly two arguments are allowed
+ if(ids.size() != 2) {
+ return;
+ }
+
+ String id1 = hashComplexSlotMentions.get(ids.get(0)).value; // obtain mention id1
+ String role1 = hashComplexSlotMentions.get(ids.get(0)).role; // e.g. Argument
+
+ String id2 = hashComplexSlotMentions.get(ids.get(1)).value; // obtain mention id2
+ String role2 = hashComplexSlotMentions.get(ids.get(1)).role; // e.g. Related_to
+
+ relations.add(new RelationInfo(id1, id2, role1, role2, relationType));
+ }
+
+ /**
+ * Convert Argument_CU and Related_to_CU to Argument and Related_to.
+ * This will not be necessary in the future when the data will be
+ * post-processed to remove _CU suffixes.
+ *
+ * Currently mipacq data does not have the suffixes and sharp data does.
+ */
+ private static String normalizeName(String role) {
+
+ if(role.equals("Argument_CU")) {
+ return "Argument";
+ }
+
+ if(role.equals("Related_to_CU")) {
+ return "Related_to";
+ }
+
+ return role;
+
+ }
+}
Added: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/XmiCollectionReaderCtakes.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/XmiCollectionReaderCtakes.java?rev=1411758&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/XmiCollectionReaderCtakes.java (added)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/XmiCollectionReaderCtakes.java Tue Nov 20 17:44:30 2012
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.ctakes.assertion.cr;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.impl.XmiCasDeserializer;
+import org.apache.uima.collection.CollectionException;
+import org.apache.uima.collection.CollectionReader_ImplBase;
+import org.apache.uima.resource.ResourceConfigurationException;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.Progress;
+import org.apache.uima.util.ProgressImpl;
+import org.xml.sax.SAXException;
+
+/**
+ * A simple collection reader that reads CASes in XMI format from a directory in the filesystem.
+ */
+public class XmiCollectionReaderCtakes extends CollectionReader_ImplBase {
+ /**
+ * Name of configuration parameter that must be set to the path of a directory containing the XMI
+ * files.
+ */
+ public static final String PARAM_INPUTDIR = "InputDirectory";
+
+ /**
+ * Name of the configuration parameter that must be set to indicate if the
+ * execution fails if an encountered type is unknown
+ */
+ public static final String PARAM_FAILUNKNOWN = "FailOnUnknownType";
+
+ private Boolean mFailOnUnknownType;
+
+ private ArrayList mFiles;
+
+ private int mCurrentIndex;
+
+ /**
+ * @see org.apache.uima.collection.CollectionReader_ImplBase#initialize()
+ */
+ public void initialize() throws ResourceInitializationException {
+ mFailOnUnknownType = (Boolean) getConfigParameterValue(PARAM_FAILUNKNOWN);
+ if (null == mFailOnUnknownType) {
+ mFailOnUnknownType = true; // default to true if not specified
+ }
+ File directory = new File(((String) getConfigParameterValue(PARAM_INPUTDIR)).trim());
+ mCurrentIndex = 0;
+
+ // if input directory does not exist or is not a directory, throw exception
+ if (!directory.exists() || !directory.isDirectory()) {
+ throw new ResourceInitializationException(ResourceConfigurationException.DIRECTORY_NOT_FOUND,
+ new Object[] { PARAM_INPUTDIR, this.getMetaData().getName(), directory.getPath() });
+ }
+
+ // get list of .xmi files in the specified directory
+ mFiles = new ArrayList();
+ File[] files = directory.listFiles();
+ for (int i = 0; i < files.length; i++) {
+ if (!files[i].isDirectory() && files[i].getName().endsWith(".xmi")) {
+ mFiles.add(files[i]);
+ }
+ }
+ }
+
+ /**
+ * @see org.apache.uima.collection.CollectionReader#hasNext()
+ */
+ public boolean hasNext() {
+ return mCurrentIndex < mFiles.size();
+ }
+
+ /**
+ * @see org.apache.uima.collection.CollectionReader#getNext(org.apache.uima.cas.CAS)
+ */
+ public void getNext(CAS aCAS) throws IOException, CollectionException {
+ File currentFile = (File) mFiles.get(mCurrentIndex++);
+ FileInputStream inputStream = new FileInputStream(currentFile);
+ try {
+ XmiCasDeserializer.deserialize(inputStream, aCAS, ! mFailOnUnknownType);
+ } catch (SAXException e) {
+ throw new CollectionException(e);
+ } finally {
+ inputStream.close();
+ }
+ }
+
+ /**
+ * @see org.apache.uima.collection.base_cpm.BaseCollectionReader#close()
+ */
+ public void close() throws IOException {
+ }
+
+ /**
+ * @see org.apache.uima.collection.base_cpm.BaseCollectionReader#getProgress()
+ */
+ public Progress[] getProgress() {
+ return new Progress[] { new ProgressImpl(mCurrentIndex, mFiles.size(), Progress.ENTITIES) };
+ }
+
+}
Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvalBasedOnModifier.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvalBasedOnModifier.java?rev=1411758&r1=1411757&r2=1411758&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvalBasedOnModifier.java (original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvalBasedOnModifier.java Tue Nov 20 17:44:30 2012
@@ -25,6 +25,8 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
import org.apache.log4j.Logger;
import org.apache.uima.analysis_engine.AnalysisEngine;
@@ -53,7 +55,14 @@ import org.cleartk.eval.AnnotationStatis
import org.cleartk.eval.Evaluation_ImplBase;
import org.cleartk.util.Options_ImplBase;
import org.kohsuke.args4j.Option;
+import org.kohsuke.args4j.spi.BooleanOptionHandler;
import org.apache.ctakes.assertion.medfacts.cleartk.AssertionCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.ConditionalCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.GenericCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.PolarityCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.SubjectCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.UncertaintyCleartkAnalysisEngine;
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
import org.uimafit.component.JCasAnnotator_ImplBase;
import org.uimafit.factory.AggregateBuilder;
import org.uimafit.factory.AnalysisEngineFactory;
@@ -82,7 +91,7 @@ import org.apache.ctakes.typesystem.type
import org.apache.ctakes.typesystem.type.textsem.Modifier;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
-public class AssertionEvalBasedOnModifier extends Evaluation_ImplBase<File, AnnotationStatistics> {
+public class AssertionEvalBasedOnModifier extends Evaluation_ImplBase<File, Map<String, AnnotationStatistics>> {
private static Logger logger = Logger.getLogger(AssertionEvalBasedOnModifier.class);
@@ -105,18 +114,58 @@ public class AssertionEvalBasedOnModifie
required = true)
public File modelsDirectory;
+ @Option(
+ name = "--run-polarity",
+ usage = "specify whether polarity processing should be run (true or false). default: true",
+ required = false)
+ public boolean runPolarity = true;
+
+ @Option(
+ name = "--run-conditional",
+ usage = "specify whether conditional processing should be run (true or false). default: true",
+ required = false)
+ public boolean runConditional = true;
+
+ @Option(
+ name = "--run-uncertainty",
+ usage = "specify whether uncertainty processing should be run (true or false). default: true",
+ required = false)
+ public boolean runUncertainty = true;
+
+ @Option(
+ name = "--run-subject",
+ usage = "specify whether subject processing should be run (true or false). default: true",
+ required = false,
+ handler=BooleanOptionHandler.class)
+ public boolean runSubject = true;
+
+ @Option(
+ name = "--run-generic",
+ usage = "specify whether generic processing should be run (true or false). default: true",
+ required = false)
+ public boolean runGeneric = true;
+
}
+
+ protected ArrayList<String> annotationTypes;
private Class<? extends AssertionCleartkAnalysisEngine> classifierAnnotatorClass;
private Class<? extends DataWriterFactory<String>> dataWriterFactoryClass;
+ protected static Options options = new Options();
public static void main(String[] args) throws Exception {
- Options options = new Options();
+ //Options options = new Options();
options.parseOptions(args);
+
+ System.err.println("forcing skipping of subject processing!!!");
+ options.runSubject = false;
+ System.err.println("forcing skipping of generic processing!!!");
+ options.runGeneric = false;
+ printOptionsForDebugging(options);
List<File> trainFiles = Arrays.asList(options.trainDirectory.listFiles());
//File modelsDir = new File("models/modifier");
File modelsDir = options.modelsDirectory;
@@ -134,9 +183,17 @@ public class AssertionEvalBasedOnModifie
Class<? extends AssertionCleartkAnalysisEngine> annotatorClass = AssertionCleartkAnalysisEngine.class;
+ //String [] annotationTypes = { "polarity", "conditional", "uncertainty", "subject", "generic" };
+ ArrayList<String> annotationTypes = new ArrayList<String>();
+ if (options.runPolarity) { annotationTypes.add("polarity"); }
+ if (options.runConditional) { annotationTypes.add("conditional"); }
+ if (options.runUncertainty) { annotationTypes.add("uncertainty"); }
+ if (options.runSubject) { annotationTypes.add("subject"); }
+ if (options.runGeneric) { annotationTypes.add("generic"); }
AssertionEvalBasedOnModifier evaluation = new AssertionEvalBasedOnModifier(
modelsDir,
+ annotationTypes,
annotatorClass,
dataWriterFactoryClass
);
@@ -164,19 +221,26 @@ public class AssertionEvalBasedOnModifie
if(options.testDirectory == null) {
// run n-fold cross-validation
- List<AnnotationStatistics> foldStats = evaluation.crossValidation(trainFiles, 2);
+ List<Map<String, AnnotationStatistics>> foldStats = evaluation.crossValidation(trainFiles, 2);
//AnnotationStatistics overallStats = AnnotationStatistics.addAll(foldStats);
- AnnotationStatistics overallStats = new AnnotationStatistics();
- for (AnnotationStatistics singleFoldStats : foldStats)
+ Map<String, AnnotationStatistics> overallStats = new TreeMap<String, AnnotationStatistics>();
+
+ for (String currentAnnotationType : annotationTypes)
+ {
+ AnnotationStatistics currentAnnotationStatistics = new AnnotationStatistics();
+ overallStats.put(currentAnnotationType, currentAnnotationStatistics);
+ }
+ for (Map<String, AnnotationStatistics> singleFoldMap : foldStats)
{
- overallStats.addAll(singleFoldStats);
+ for (String currentAnnotationType : annotationTypes)
+ {
+ AnnotationStatistics currentFoldStatistics = singleFoldMap.get(currentAnnotationType);
+ overallStats.get(currentAnnotationType).addAll(currentFoldStatistics);
+ }
}
- System.err.println("overall:");
- System.err.print(overallStats);
- System.err.println(overallStats.confusions());
- System.err.println();
-
+ AssertionEvalBasedOnModifier.printScore(overallStats, "CROSS FOLD OVERALL");
+
} else {
// train on the entire training set and evaluate on the test set
List<File> testFiles = Arrays.asList(options.testDirectory.listFiles());
@@ -185,21 +249,65 @@ public class AssertionEvalBasedOnModifie
evaluation.train(trainCollectionReader, modelsDir);
CollectionReader testCollectionReader = evaluation.getCollectionReader(testFiles);
- AnnotationStatistics stats = evaluation.test(testCollectionReader, modelsDir);
- return;
+ Map<String, AnnotationStatistics> stats = evaluation.test(testCollectionReader, modelsDir);
+
+ AssertionEvalBasedOnModifier.printScore(stats, modelsDir.getAbsolutePath());
}
+ System.out.println("Finished.");
+
+ }
+
+ private static void printOptionsForDebugging(Options options)
+ {
+ System.out.format(
+ "training dir: %s%n" +
+ "test dir: %s%n" +
+ "model dir: %s%n" +
+ "run polarity: %b%n" +
+ "run conditional: %b%n" +
+ "run uncertainty: %b%n" +
+ "run subject: %b%n" +
+ "run generic: %b%n" +
+ "%n%n",
+ options.trainDirectory.getAbsolutePath(),
+ options.testDirectory.getAbsolutePath(),
+ options.modelsDirectory.getAbsolutePath(),
+ options.runPolarity,
+ options.runConditional,
+ options.runUncertainty,
+ options.runSubject,
+ options.runGeneric
+ );
+ }
+
+public static void printScore(Map<String, AnnotationStatistics> map, String directory)
+ {
+ for (Map.Entry<String, AnnotationStatistics> currentEntry : map.entrySet())
+ {
+ String annotationType = currentEntry.getKey();
+ AnnotationStatistics stats = currentEntry.getValue();
+
+ System.out.format("directory: \"%s\"; assertion type: %s%n%n%s%n%n",
+ directory,
+ annotationType.toUpperCase(),
+ stats.toString());
+ }
+
}
private String[] trainingArguments;
public AssertionEvalBasedOnModifier(
File directory,
+ ArrayList<String> annotationTypes,
Class<? extends AssertionCleartkAnalysisEngine> classifierAnnotatorClass,
Class<? extends DataWriterFactory<String>> dataWriterFactoryClass,
String... trainingArguments
) {
super(directory);
+
+ this.annotationTypes = annotationTypes;
this.classifierAnnotatorClass = classifierAnnotatorClass;
this.dataWriterFactoryClass = dataWriterFactoryClass;
@@ -216,7 +324,7 @@ public class AssertionEvalBasedOnModifie
}
return CollectionReaderFactory.createCollectionReader(
XMIReader.class,
- TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath("../common-type-system/desc/common_type_system.xml"),
+ TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath(),
XMIReader.PARAM_FILES,
paths);
}
@@ -244,17 +352,80 @@ public class AssertionEvalBasedOnModifie
AnalysisEngineDescription assertionAttributeClearerAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ReferenceAnnotationsSystemAssertionClearer.class);
builder.add(assertionAttributeClearerAnnotator);
- AnalysisEngineDescription assertionAnnotator = AnalysisEngineFactory.createPrimitiveDescription(AssertionCleartkAnalysisEngine.class); //, this.additionalParamemters);
- ConfigurationParameterFactory.addConfigurationParameters(
- assertionAnnotator,
- AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
- AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
- CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
- this.dataWriterFactoryClass.getName(),
- DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
- directory.getPath()
- );
- builder.add(assertionAnnotator);
+ if (options.runPolarity)
+ {
+ AnalysisEngineDescription polarityAnnotator = AnalysisEngineFactory.createPrimitiveDescription(PolarityCleartkAnalysisEngine.class); //, this.additionalParamemters);
+ ConfigurationParameterFactory.addConfigurationParameters(
+ polarityAnnotator,
+ AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+ AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+ CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
+ this.dataWriterFactoryClass.getName(),
+ DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+ new File(directory, "polarity").getPath()
+ );
+ builder.add(polarityAnnotator);
+ }
+
+ if (options.runConditional)
+ {
+ AnalysisEngineDescription conditionalAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ConditionalCleartkAnalysisEngine.class); //, this.additionalParamemters);
+ ConfigurationParameterFactory.addConfigurationParameters(
+ conditionalAnnotator,
+ AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+ AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+ CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
+ this.dataWriterFactoryClass.getName(),
+ DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+ new File(directory, "conditional").getPath()
+ );
+ builder.add(conditionalAnnotator);
+ }
+
+ if (options.runUncertainty)
+ {
+ AnalysisEngineDescription uncertaintyAnnotator = AnalysisEngineFactory.createPrimitiveDescription(UncertaintyCleartkAnalysisEngine.class); //, this.additionalParamemters);
+ ConfigurationParameterFactory.addConfigurationParameters(
+ uncertaintyAnnotator,
+ AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+ AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+ CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
+ this.dataWriterFactoryClass.getName(),
+ DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+ new File(directory, "uncertainty").getPath()
+ );
+ builder.add(uncertaintyAnnotator);
+ }
+
+ if (options.runSubject)
+ {
+ AnalysisEngineDescription subjectAnnotator = AnalysisEngineFactory.createPrimitiveDescription(SubjectCleartkAnalysisEngine.class); //, this.additionalParamemters);
+ ConfigurationParameterFactory.addConfigurationParameters(
+ subjectAnnotator,
+ AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+ AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+ CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
+ this.dataWriterFactoryClass.getName(),
+ DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+ new File(directory, "subject").getPath()
+ );
+ builder.add(subjectAnnotator);
+ }
+
+ if (options.runGeneric)
+ {
+ AnalysisEngineDescription genericAnnotator = AnalysisEngineFactory.createPrimitiveDescription(GenericCleartkAnalysisEngine.class); //, this.additionalParamemters);
+ ConfigurationParameterFactory.addConfigurationParameters(
+ genericAnnotator,
+ AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+ AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+ CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
+ this.dataWriterFactoryClass.getName(),
+ DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+ new File(directory, "generic").getPath()
+ );
+ builder.add(genericAnnotator);
+ }
/*
AnalysisEngineDescription classifierAnnotator = AnalysisEngineFactory.createPrimitiveDescription(
@@ -274,12 +445,16 @@ public class AssertionEvalBasedOnModifie
SimplePipeline.runPipeline(collectionReader, builder.createAggregateDescription());
HideOutput hider = new HideOutput();
- JarClassifierBuilder.trainAndPackage(directory, this.trainingArguments);
+ for (String currentAssertionAttribute : annotationTypes)
+ {
+ File currentDirectory = new File(directory, currentAssertionAttribute);
+ JarClassifierBuilder.trainAndPackage(currentDirectory, trainingArguments);
+ }
hider.restoreOutput();
}
@Override
- protected AnnotationStatistics test(CollectionReader collectionReader, File directory)
+ protected Map<String, AnnotationStatistics> test(CollectionReader collectionReader, File directory)
throws Exception {
// AnalysisEngine classifierAnnotator = AnalysisEngineFactory.createPrimitive(AssertionCleartkAnalysisEngine.getDescription(
// GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
@@ -293,21 +468,107 @@ public class AssertionEvalBasedOnModifie
AnalysisEngineDescription assertionAttributeClearerAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ReferenceAnnotationsSystemAssertionClearer.class);
builder.add(assertionAttributeClearerAnnotator);
- AnalysisEngineDescription assertionAnnotator = AnalysisEngineFactory.createPrimitiveDescription(AssertionCleartkAnalysisEngine.class); //, this.additionalParamemters);
- ConfigurationParameterFactory.addConfigurationParameters(
- assertionAnnotator,
- AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
- AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
- GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
- new File(directory, "model.jar").getPath()
- );
- builder.add(assertionAnnotator);
+ if (options.runPolarity)
+ {
+ AnalysisEngineDescription polarityAnnotator = AnalysisEngineFactory.createPrimitiveDescription(PolarityCleartkAnalysisEngine.class); //, this.additionalParamemters);
+ ConfigurationParameterFactory.addConfigurationParameters(
+ polarityAnnotator,
+ AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+ AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+ GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+ new File(new File(directory, "polarity"), "model.jar").getPath()
+ );
+ builder.add(polarityAnnotator);
+ }
+
+ if (options.runConditional)
+ {
+ AnalysisEngineDescription conditionalAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ConditionalCleartkAnalysisEngine.class); //, this.additionalParamemters);
+ ConfigurationParameterFactory.addConfigurationParameters(
+ conditionalAnnotator,
+ AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+ AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+ GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+ new File(new File(directory, "conditional"), "model.jar").getPath()
+ );
+ builder.add(conditionalAnnotator);
+ }
+
+ if (options.runUncertainty)
+ {
+ AnalysisEngineDescription uncertaintyAnnotator = AnalysisEngineFactory.createPrimitiveDescription(UncertaintyCleartkAnalysisEngine.class); //, this.additionalParamemters);
+ ConfigurationParameterFactory.addConfigurationParameters(
+ uncertaintyAnnotator,
+ AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+ AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+ GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+ new File(new File(directory, "uncertainty"), "model.jar").getPath()
+ );
+ builder.add(uncertaintyAnnotator);
+ }
+
+ if (options.runSubject)
+ {
+ AnalysisEngineDescription subjectAnnotator = AnalysisEngineFactory.createPrimitiveDescription(SubjectCleartkAnalysisEngine.class); //, this.additionalParamemters);
+ ConfigurationParameterFactory.addConfigurationParameters(
+ subjectAnnotator,
+ AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+ AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+ GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+ new File(new File(directory, "subject"), "model.jar").getPath()
+ );
+ builder.add(subjectAnnotator);
+ }
+
+ if (options.runGeneric)
+ {
+ AnalysisEngineDescription genericAnnotator = AnalysisEngineFactory.createPrimitiveDescription(GenericCleartkAnalysisEngine.class); //, this.additionalParamemters);
+ ConfigurationParameterFactory.addConfigurationParameters(
+ genericAnnotator,
+ AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+ AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+ GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+ new File(new File(directory, "generic"), "model.jar").getPath()
+ );
+ builder.add(genericAnnotator);
+ }
//SimplePipeline.runPipeline(collectionReader, builder.createAggregateDescription());
AnalysisEngineDescription aggregateDescription = builder.createAggregateDescription();
AnalysisEngine aggregate = builder.createAggregate();
- AnnotationStatistics stats = new AnnotationStatistics();
+ AnnotationStatistics polarityStats = new AnnotationStatistics();
+ AnnotationStatistics conditionalStats = new AnnotationStatistics();
+ AnnotationStatistics uncertaintyStats = new AnnotationStatistics();
+ AnnotationStatistics subjectStats = new AnnotationStatistics();
+ AnnotationStatistics genericStats = new AnnotationStatistics();
+
+ Map<String, AnnotationStatistics> map = new TreeMap<String, AnnotationStatistics>();
+ if (options.runPolarity)
+ {
+ map.put("polarity", polarityStats);
+ }
+
+ if (options.runConditional)
+ {
+ map.put("conditional", conditionalStats);
+ }
+
+ if (options.runUncertainty)
+ {
+ map.put("uncertainty", uncertaintyStats);
+ }
+
+ if (options.runSubject)
+ {
+ map.put("subject", subjectStats);
+ }
+
+ if (options.runGeneric)
+ {
+ map.put("generic", genericStats);
+ }
+
for (JCas jCas : new JCasIterable(collectionReader, aggregate)) {
JCas goldView;
try {
@@ -315,21 +576,62 @@ public class AssertionEvalBasedOnModifie
} catch (CASException e) {
throw new AnalysisEngineProcessException(e);
}
- Collection<IdentifiedAnnotation> goldEntities = new ArrayList<IdentifiedAnnotation>();
- goldEntities.addAll(JCasUtil.select(goldView, EntityMention.class));
- goldEntities.addAll(JCasUtil.select(goldView, EventMention.class));
-
- Collection<IdentifiedAnnotation> systemEntities = new ArrayList<IdentifiedAnnotation>();
- systemEntities.addAll(JCasUtil.select(jCas, EntityMention.class));
- systemEntities.addAll(JCasUtil.select(jCas, EventMention.class));
-
- stats.add(goldEntities, systemEntities,
- AnnotationStatistics.<IdentifiedAnnotation>annotationToSpan(),
- AnnotationStatistics.<IdentifiedAnnotation>annotationToFeatureValue("polarity"));
- }
- System.err.println(directory.getName() + ":");
- System.err.println(stats);
- return stats;
+
+ String documentId = DocumentIDAnnotationUtil.getDocumentID(jCas);
+ System.out.format("document id: %s%n", documentId);
+
+ Collection<IdentifiedAnnotation> goldEntitiesAndEvents = new ArrayList<IdentifiedAnnotation>();
+ Collection<EntityMention> goldEntities = JCasUtil.select(goldView, EntityMention.class);
+ goldEntitiesAndEvents.addAll(goldEntities);
+ Collection<EventMention> goldEvents = JCasUtil.select(goldView, EventMention.class);
+ goldEntitiesAndEvents.addAll(goldEvents);
+ System.out.format("gold entities: %d%ngold events: %d%n%n", goldEntities.size(), goldEvents.size());
+
+ Collection<IdentifiedAnnotation> systemEntitiesAndEvents = new ArrayList<IdentifiedAnnotation>();
+ Collection<EntityMention> systemEntities = JCasUtil.select(jCas, EntityMention.class);
+ systemEntitiesAndEvents.addAll(systemEntities);
+ Collection<EventMention> systemEvents = JCasUtil.select(jCas, EventMention.class);
+ systemEntitiesAndEvents.addAll(systemEvents);
+ System.out.format("system entities: %d%nsystem events: %d%n%n", systemEntities.size(), systemEvents.size());
+
+ if (options.runPolarity)
+ {
+ polarityStats.add(goldEntitiesAndEvents, systemEntitiesAndEvents,
+ AnnotationStatistics.<IdentifiedAnnotation>annotationToSpan(),
+ AnnotationStatistics.<IdentifiedAnnotation>annotationToFeatureValue("polarity"));
+ }
+
+ if (options.runConditional)
+ {
+ conditionalStats.add(goldEntitiesAndEvents, systemEntitiesAndEvents,
+ AnnotationStatistics.<IdentifiedAnnotation>annotationToSpan(),
+ AnnotationStatistics.<IdentifiedAnnotation>annotationToFeatureValue("conditional"));
+ }
+
+ if (options.runUncertainty)
+ {
+ uncertaintyStats.add(goldEntitiesAndEvents, systemEntitiesAndEvents,
+ AnnotationStatistics.<IdentifiedAnnotation>annotationToSpan(),
+ AnnotationStatistics.<IdentifiedAnnotation>annotationToFeatureValue("uncertainty"));
+ }
+
+ if (options.runSubject)
+ {
+ subjectStats.add(goldEntitiesAndEvents, systemEntitiesAndEvents,
+ AnnotationStatistics.<IdentifiedAnnotation>annotationToSpan(),
+ AnnotationStatistics.<IdentifiedAnnotation>annotationToFeatureValue("subject"));
+ }
+
+ if (options.runGeneric)
+ {
+ genericStats.add(goldEntitiesAndEvents, systemEntitiesAndEvents,
+ AnnotationStatistics.<IdentifiedAnnotation>annotationToSpan(),
+ AnnotationStatistics.<IdentifiedAnnotation>annotationToFeatureValue("generic"));
+ }
+
+ }
+
+ return map;
}
public static final String GOLD_VIEW_NAME = "GoldView";
@@ -413,134 +715,134 @@ public class AssertionEvalBasedOnModifie
}
}
- public static class ReplaceGoldEntityMentionsAndModifiersWithCTakes extends
- JCasAnnotator_ImplBase
- {
-
- @Override
- public void process(JCas jCas) throws AnalysisEngineProcessException
- {
- JCas goldView, systemView;
- try
- {
- goldView = jCas.getView(GOLD_VIEW_NAME);
- systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
- } catch (CASException e)
- {
- throw new AnalysisEngineProcessException(e);
- }
-
- // remove manual EntityMentions and Modifiers from gold view
- List<IdentifiedAnnotation> goldMentions = new ArrayList<IdentifiedAnnotation>();
- goldMentions.addAll(JCasUtil.select(goldView, EntityMention.class));
- goldMentions.addAll(JCasUtil.select(goldView, Modifier.class));
- for (IdentifiedAnnotation goldMention : goldMentions)
- {
- goldMention.removeFromIndexes();
- }
-
- // copy cTAKES EntityMentions and Modifiers to gold view
- List<IdentifiedAnnotation> cTakesMentions = new ArrayList<IdentifiedAnnotation>();
- cTakesMentions.addAll(JCasUtil.select(systemView, EntityMention.class));
- cTakesMentions.addAll(JCasUtil.select(systemView, Modifier.class));
- CasCopier copier = new CasCopier(systemView.getCas(), goldView.getCas());
- for (IdentifiedAnnotation cTakesMention : cTakesMentions)
- {
- Annotation copy = (Annotation) copier.copyFs(cTakesMention);
- Feature sofaFeature = copy.getType().getFeatureByBaseName("sofa");
- copy.setFeatureValue(sofaFeature, goldView.getSofa());
- copy.addToIndexes();
- }
-
- // replace gold EntityMentions and Modifiers in relations with cTAKES ones
- List<BinaryTextRelation> relations = new ArrayList<BinaryTextRelation>();
- relations.addAll(JCasUtil.select(goldView, BinaryTextRelation.class));
- for (BinaryTextRelation relation : relations)
- {
-
- // attempt to replace the gold RelationArguments with system ones
- int replacedArgumentCount = 0;
- for (RelationArgument relArg : Arrays.asList(relation.getArg1(),
- relation.getArg2()))
- {
- Annotation goldArg = relArg.getArgument();
- Class<? extends Annotation> argClass = goldArg.getClass();
-
- // find all annotations covered by the gold argument and of the same
- // class (these should
- // be the ones copied over from the cTAKES output earlier)
- List<? extends Annotation> systemArgs = JCasUtil.selectCovered(
- goldView, argClass, goldArg);
-
- // no ctakes annotation found
- if (systemArgs.size() == 0)
- {
- String word = "no";
- String className = argClass.getSimpleName();
- String argText = goldArg.getCoveredText();
- String message = String.format("%s %s for \"%s\"", word, className,
- argText);
- this.getContext().getLogger().log(Level.FINE, message);
- continue;
- }
-
- // if there's exactly one annotation, replace the gold one with that
- if (systemArgs.size() == 1)
- {
- relArg.setArgument(systemArgs.get(0));
- replacedArgumentCount += 1;
- }
-
- else
- {
- // multiple ctakes arguments found; look for one that matches
- // exactly
- // e.g. gold: "right breast", ctakes: "right breast", "breast"
- for (Annotation systemArg : systemArgs)
- {
- String goldArgText = goldArg.getCoveredText();
- String systemArgText = systemArg.getCoveredText();
- if (systemArgText.equals(goldArgText))
- {
- relArg.setArgument(systemArg);
- replacedArgumentCount += 1;
- }
- }
-
- if (replacedArgumentCount < 1)
- {
- // issue a warning message
- String word = "multiple";
- String className = argClass.getSimpleName();
- String argText = goldArg.getCoveredText();
- String message = String.format("%s %s for \"%s\"", word,
- className, argText);
- this.getContext().getLogger().log(Level.FINE, message);
-
- System.out.println("gold argument: " + goldArg.getCoveredText());
- System.out.println("gold type: "
- + ((IdentifiedAnnotation) goldArg).getTypeID());
- for (Annotation systemArg : systemArgs)
- {
- System.out.println("ctakes argument: "
- + systemArg.getCoveredText());
- System.out.println("ctakes type: "
- + ((IdentifiedAnnotation) systemArg).getTypeID());
- }
- System.out.println();
- }
- }
- }
-
- // if replacements were not found for both arguments, remove the
- // relation
- if (replacedArgumentCount < 2)
- {
- relation.removeFromIndexes();
- }
- }
- }
- }
+// public static class ReplaceGoldEntityMentionsAndModifiersWithCTakes extends
+// JCasAnnotator_ImplBase
+// {
+//
+// @Override
+// public void process(JCas jCas) throws AnalysisEngineProcessException
+// {
+// JCas goldView, systemView;
+// try
+// {
+// goldView = jCas.getView(GOLD_VIEW_NAME);
+// systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
+// } catch (CASException e)
+// {
+// throw new AnalysisEngineProcessException(e);
+// }
+//
+// // remove manual EntityMentions and Modifiers from gold view
+// List<IdentifiedAnnotation> goldMentions = new ArrayList<IdentifiedAnnotation>();
+// goldMentions.addAll(JCasUtil.select(goldView, EntityMention.class));
+// goldMentions.addAll(JCasUtil.select(goldView, Modifier.class));
+// for (IdentifiedAnnotation goldMention : goldMentions)
+// {
+// goldMention.removeFromIndexes();
+// }
+//
+// // copy cTAKES EntityMentions and Modifiers to gold view
+// List<IdentifiedAnnotation> cTakesMentions = new ArrayList<IdentifiedAnnotation>();
+// cTakesMentions.addAll(JCasUtil.select(systemView, EntityMention.class));
+// cTakesMentions.addAll(JCasUtil.select(systemView, Modifier.class));
+// CasCopier copier = new CasCopier(systemView.getCas(), goldView.getCas());
+// for (IdentifiedAnnotation cTakesMention : cTakesMentions)
+// {
+// Annotation copy = (Annotation) copier.copyFs(cTakesMention);
+// Feature sofaFeature = copy.getType().getFeatureByBaseName("sofa");
+// copy.setFeatureValue(sofaFeature, goldView.getSofa());
+// copy.addToIndexes();
+// }
+//
+// // replace gold EntityMentions and Modifiers in relations with cTAKES ones
+// List<BinaryTextRelation> relations = new ArrayList<BinaryTextRelation>();
+// relations.addAll(JCasUtil.select(goldView, BinaryTextRelation.class));
+// for (BinaryTextRelation relation : relations)
+// {
+//
+// // attempt to replace the gold RelationArguments with system ones
+// int replacedArgumentCount = 0;
+// for (RelationArgument relArg : Arrays.asList(relation.getArg1(),
+// relation.getArg2()))
+// {
+// Annotation goldArg = relArg.getArgument();
+// Class<? extends Annotation> argClass = goldArg.getClass();
+//
+// // find all annotations covered by the gold argument and of the same
+// // class (these should
+// // be the ones copied over from the cTAKES output earlier)
+// List<? extends Annotation> systemArgs = JCasUtil.selectCovered(
+// goldView, argClass, goldArg);
+//
+// // no ctakes annotation found
+// if (systemArgs.size() == 0)
+// {
+// String word = "no";
+// String className = argClass.getSimpleName();
+// String argText = goldArg.getCoveredText();
+// String message = String.format("%s %s for \"%s\"", word, className,
+// argText);
+// this.getContext().getLogger().log(Level.FINE, message);
+// continue;
+// }
+//
+// // if there's exactly one annotation, replace the gold one with that
+// if (systemArgs.size() == 1)
+// {
+// relArg.setArgument(systemArgs.get(0));
+// replacedArgumentCount += 1;
+// }
+//
+// else
+// {
+// // multiple ctakes arguments found; look for one that matches
+// // exactly
+// // e.g. gold: "right breast", ctakes: "right breast", "breast"
+// for (Annotation systemArg : systemArgs)
+// {
+// String goldArgText = goldArg.getCoveredText();
+// String systemArgText = systemArg.getCoveredText();
+// if (systemArgText.equals(goldArgText))
+// {
+// relArg.setArgument(systemArg);
+// replacedArgumentCount += 1;
+// }
+// }
+//
+// if (replacedArgumentCount < 1)
+// {
+// // issue a warning message
+// String word = "multiple";
+// String className = argClass.getSimpleName();
+// String argText = goldArg.getCoveredText();
+// String message = String.format("%s %s for \"%s\"", word,
+// className, argText);
+// this.getContext().getLogger().log(Level.FINE, message);
+//
+// System.out.println("gold argument: " + goldArg.getCoveredText());
+// System.out.println("gold type: "
+// + ((IdentifiedAnnotation) goldArg).getTypeID());
+// for (Annotation systemArg : systemArgs)
+// {
+// System.out.println("ctakes argument: "
+// + systemArg.getCoveredText());
+// System.out.println("ctakes type: "
+// + ((IdentifiedAnnotation) systemArg).getTypeID());
+// }
+// System.out.println();
+// }
+// }
+// }
+//
+// // if replacements were not found for both arguments, remove the
+// // relation
+// if (replacedArgumentCount < 2)
+// {
+// relation.removeFromIndexes();
+// }
+// }
+// }
+// }
/**
* Class that copies the manual {@link Modifier} annotations to the default CAS.
Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java?rev=1411758&r1=1411757&r2=1411758&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java (original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java Tue Nov 20 17:44:30 2012
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -66,7 +66,7 @@ import org.apache.ctakes.typesystem.type
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
-public class AssertionCleartkAnalysisEngine extends
+public abstract class AssertionCleartkAnalysisEngine extends
CleartkAnnotator<String>
{
Logger logger = Logger.getLogger(AssertionCleartkAnalysisEngine.class);
@@ -170,6 +170,7 @@ public class AssertionCleartkAnalysisEng
}
+ public abstract void setClassLabel(IdentifiedAnnotation entityMention, Instance<String> instance) throws AnalysisEngineProcessException;
@Override
@@ -271,32 +272,7 @@ public class AssertionCleartkAnalysisEng
instance.addAll(extractor.extract(identifiedAnnotationView, entityMention));
}
- if (this.isTraining())
- {
- String polarity = (entityMention.getPolarity() == -1) ? "negated" : "present";
- instance.setOutcome(polarity);
- if ("negated".equals(polarity))
- {
- logger.info("TRAINING: " + polarity);
- }
- this.dataWriter.write(instance);
- } else
- {
- String label = this.classifier.classify(instance.getFeatures());
- int polarity = 1;
- if (label!= null && label.equals("present"))
- {
- polarity = 0;
- } else if (label != null && label.equals("negated"))
- {
- polarity = -1;
- }
- entityMention.setPolarity(polarity);
- if ("negated".equals(label))
- {
- logger.info(String.format("DECODING/EVAL: %s//%s [%d-%d] (%s)", label, polarity, entityMention.getBegin(), entityMention.getEnd(), entityMention.getClass().getName()));
- }
- }
+ setClassLabel(entityMention, instance);
}
Added: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java?rev=1411758&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java (added)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java Tue Nov 20 17:44:30 2012
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.assertion.medfacts.cleartk;
+
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.cleartk.classifier.Instance;
+
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+
+public class ConditionalCleartkAnalysisEngine extends
+ AssertionCleartkAnalysisEngine {
+
+ @Override
+ public void setClassLabel(IdentifiedAnnotation entityMention,
+ Instance<String> instance) throws AnalysisEngineProcessException {
+ if (this.isTraining())
+ {
+ String conditional = (entityMention.getConditional()) ? "conditional" : "nonconditional";
+ instance.setOutcome(conditional);
+ this.dataWriter.write(instance);
+ } else
+ {
+ String label = this.classifier.classify(instance.getFeatures());
+ boolean conditional = false;
+ if (label!= null && label.equals("conditional"))
+ {
+ conditional = true;
+ } else if (label != null && label.equals("nonconditional"))
+ {
+ conditional = false;
+ }
+ entityMention.setConditional(conditional);
+ }
+ }
+}
Added: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java?rev=1411758&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java (added)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java Tue Nov 20 17:44:30 2012
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.assertion.medfacts.cleartk;
+
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.cleartk.classifier.Instance;
+
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+
+public class GenericCleartkAnalysisEngine extends
+ AssertionCleartkAnalysisEngine {
+
+ @Override
+ public void setClassLabel(IdentifiedAnnotation entityMention,
+ Instance<String> instance) throws AnalysisEngineProcessException {
+ if (this.isTraining())
+ {
+ String subj = entityMention.getSubject();
+ instance.setOutcome(subj);
+ this.dataWriter.write(instance);
+ } else
+ {
+ String label = this.classifier.classify(instance.getFeatures());
+ entityMention.setSubject(label);
+ }
+ }
+
+}