You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ma...@apache.org on 2012/11/20 18:44:33 UTC

svn commit: r1411758 [1/2] - in /incubator/ctakes/trunk/ctakes-assertion: .settings/ resources/launch/ src/main/java/org/apache/ctakes/assertion/cr/ src/main/java/org/apache/ctakes/assertion/eval/ src/main/java/org/apache/ctakes/assertion/medfacts/clea...

Author: mattcoarr
Date: Tue Nov 20 17:44:30 2012
New Revision: 1411758

URL: http://svn.apache.org/viewvc?rev=1411758&view=rev
Log:
more work on refactoring assertion module to use cleartk and uimafit.  this includes separate analysis engines for each attribute

Added:
    incubator/ctakes/trunk/ctakes-assertion/.settings/org.eclipse.m2e.core.prefs
    incubator/ctakes/trunk/ctakes-assertion/resources/launch/GoldEntityAndAttributeReaderPipelineForSeedCorpus.launch
    incubator/ctakes/trunk/ctakes-assertion/resources/launch/ctakes__assertion_eval.launch
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/ArgumentInfo.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/GoldEntityAndAttributeReader.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/RelationInfo.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/Span.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/XMLReader.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/XmiCollectionReaderCtakes.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/UncertaintyCleartkAnalysisEngine.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/CtakesFileNamer.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipeline.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/relationextractor/
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/relationextractor/cr/
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/relationextractor/cr/Mapper.java
Modified:
    incubator/ctakes/trunk/ctakes-assertion/.settings/org.eclipse.core.resources.prefs
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvalBasedOnModifier.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java

Modified: incubator/ctakes/trunk/ctakes-assertion/.settings/org.eclipse.core.resources.prefs
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/.settings/org.eclipse.core.resources.prefs?rev=1411758&r1=1411757&r2=1411758&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/.settings/org.eclipse.core.resources.prefs (original)
+++ incubator/ctakes/trunk/ctakes-assertion/.settings/org.eclipse.core.resources.prefs Tue Nov 20 17:44:30 2012
@@ -1,5 +1,4 @@
 eclipse.preferences.version=1
 encoding//src/main/java=UTF-8
 encoding//src/main/resources=UTF-8
-encoding//target/generated-sources/jcasgen=UTF-8
 encoding/<project>=UTF-8

Added: incubator/ctakes/trunk/ctakes-assertion/.settings/org.eclipse.m2e.core.prefs
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/.settings/org.eclipse.m2e.core.prefs?rev=1411758&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/.settings/org.eclipse.m2e.core.prefs (added)
+++ incubator/ctakes/trunk/ctakes-assertion/.settings/org.eclipse.m2e.core.prefs Tue Nov 20 17:44:30 2012
@@ -0,0 +1,4 @@
+activeProfiles=
+eclipse.preferences.version=1
+resolveWorkspaceProjects=true
+version=1

Added: incubator/ctakes/trunk/ctakes-assertion/resources/launch/GoldEntityAndAttributeReaderPipelineForSeedCorpus.launch
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/resources/launch/GoldEntityAndAttributeReaderPipelineForSeedCorpus.launch?rev=1411758&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/resources/launch/GoldEntityAndAttributeReaderPipelineForSeedCorpus.launch (added)
+++ incubator/ctakes/trunk/ctakes-assertion/resources/launch/GoldEntityAndAttributeReaderPipelineForSeedCorpus.launch Tue Nov 20 17:44:30 2012
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication">
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
+<listEntry value="/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java"/>
+</listAttribute>
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
+<listEntry value="1"/>
+</listAttribute>
+<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/>
+<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="org.apache.ctakes.assertion.pipelines.GoldEntityAndAttributeReaderPipelineForSeedCorpus"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="/work/medfacts/sharp/data/2012-10-16_full_data_set_updated/Seed_Corpus/Mayo/UMLS_CEM"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="ctakes-assertion"/>
+<stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/>
+</launchConfiguration>

Added: incubator/ctakes/trunk/ctakes-assertion/resources/launch/ctakes__assertion_eval.launch
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/resources/launch/ctakes__assertion_eval.launch?rev=1411758&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/resources/launch/ctakes__assertion_eval.launch (added)
+++ incubator/ctakes/trunk/ctakes-assertion/resources/launch/ctakes__assertion_eval.launch Tue Nov 20 17:44:30 2012
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication">
+<stringAttribute key="bad_container_name" value="/ctakes-assertion/resour"/>
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
+<listEntry value="/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvalBasedOnModifier.java"/>
+</listAttribute>
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
+<listEntry value="1"/>
+</listAttribute>
+<stringAttribute key="org.eclipse.debug.ui.ATTR_CAPTURE_IN_FILE" value="/tmp/assertion.log"/>
+<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/>
+<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="org.apache.ctakes.assertion.eval.AssertionEvalBasedOnModifier"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="--train-dir /sharp_data/train --test-dir /sharp_data/test --models-dir /sharp_data/model/eval.model"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="ctakes-assertion"/>
+<stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/>
+</launchConfiguration>

Added: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/ArgumentInfo.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/ArgumentInfo.java?rev=1411758&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/ArgumentInfo.java (added)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/ArgumentInfo.java Tue Nov 20 17:44:30 2012
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.assertion.cr;
+
+/**
+ * Information about a relation argument.
+ * 
+ * It typically looks something like this in a knowtator xml file:
+ * 
+ *   <complexSlotMention id="Relations_Sept21_Schema_Set02_Instance_90018">
+ *   <mentionSlot id="Related_to" />
+ *   <complexSlotMentionValue value="Relations_Sept21_Schema_Instance_30350" />
+ *   </complexSlotMention>
+ * 
+ * This xml is parsed and stored in this class.
+ * 
+ * @author dmitriy dligach
+ *
+ */
+public class ArgumentInfo {
+
+  public String value;    // value of "value" attribute above
+  public String role; // e.g. "Related_to"
+  
+  ArgumentInfo(String value, String role) {
+    this.value = value;
+    this.role = role;
+  }
+}

Added: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/GoldEntityAndAttributeReader.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/GoldEntityAndAttributeReader.java?rev=1411758&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/GoldEntityAndAttributeReader.java (added)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/GoldEntityAndAttributeReader.java Tue Nov 20 17:44:30 2012
@@ -0,0 +1,314 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.ctakes.assertion.cr;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.ctakes.relationextractor.cr.Mapper;
+import org.jdom.Document;
+import org.jdom.JDOMException;
+import org.jdom.input.SAXBuilder;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+
+import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+
+/**
+ * Read named entity annotations from knowtator xml files into the CAS
+ * 
+ * @author stephen wu
+ *
+ */
+public class GoldEntityAndAttributeReader extends JCasAnnotator_ImplBase {
+
+	// paramater that should contain the path to knowtator xml files
+	public static final String PARAM_INPUTDIR = "InputDirectory";
+	// path to knowtator xml files
+	public static String inputDirectory;
+	// counter for assigning entity ids
+	public int identifiedAnnotationId;
+	private boolean VERBOSE = true;
+	
+	@Override
+	public void initialize(UimaContext aContext) throws ResourceInitializationException {
+		super.initialize(aContext);
+		
+		inputDirectory = (String)aContext.getConfigParameterValue(PARAM_INPUTDIR);
+		identifiedAnnotationId = 0;
+	}
+
+	@Override
+	public void process(JCas jCas) throws AnalysisEngineProcessException {
+
+			JCas initView;
+      try {
+        initView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
+      } catch (CASException e) {
+        throw new AnalysisEngineProcessException(e);
+      } 
+			String goldFilePath = inputDirectory + DocumentIDAnnotationUtil.getDocumentID(jCas) + ".knowtator.xml";
+			
+      SAXBuilder builder = new SAXBuilder();
+      Document document;
+      try {
+        document = builder.build(new File(goldFilePath));
+      } catch (JDOMException e) {
+        throw new AnalysisEngineProcessException(e);
+      } catch (Exception e) { // TODO this should be IOException, but the command-line maven build was breaking
+        throw new AnalysisEngineProcessException(e);
+      }
+
+      // map knowtator mention ids to entity offsets
+      HashMap<String, ArrayList<Span>> allMentions = XMLReader.getEntityMentions(document);
+      // map knowtator mention ids to entity types
+      HashMap<String, String> entityTypes = XMLReader.getEntityTypes(document);
+      // map knowtator mention ids to the ids of mention-attributes (or attributes themselves)
+      HashMap<String,List<String>> mentionAttr = XMLReader.getEntityAttributes(document);
+      // map knowtator mention-attribute ids to attributes
+      String[] complexSlotMention = {"complexSlotMention"};
+      HashMap<String, ArgumentInfo> attrPtr = XMLReader.getAttributes(document,complexSlotMention);
+      // map knowtator attribute ids to role-value pairs
+      HashMap<String, ArgumentInfo> attrs = XMLReader.getAttributes(document);
+
+      // pare down hashmap based on types -- keep only NEs
+      HashMap<String, ArrayList<Span>> neMentions = filterToNamedEntitiesOnly(allMentions,entityTypes);
+
+      for(Map.Entry<String, ArrayList<Span>> mention : neMentions.entrySet()) {
+    	  String mentionId = mention.getKey();
+    	  
+//    	  // pare down what to consider -- keep only valid NEs, discard modifiers
+//    	  if (!filterToNamedEntitiesOnly(allMentions,
+//    			  mentionId,entityTypes.get(mentionId))) {
+//    		  continue;
+//    	  }
+    	  
+    	  Span first = null;
+    	  Span last = null;
+    	  // for disjoint spans, just ignore the gap
+    	  first = mention.getValue().get(0);
+    	  last = mention.getValue().get(mention.getValue().size() - 1);
+
+    	  // put entity and attributes into the CAS
+    	  // choose either entity or event
+    	  IdentifiedAnnotation eMention;
+    	  int type = Mapper.getEntityTypeId(entityTypes.get(mentionId));
+    	  if (type==CONST.NE_TYPE_ID_ANATOMICAL_SITE) {
+    		  eMention = new EntityMention(initView, first.start, last.end);  
+    	  } else if (type==CONST.NE_TYPE_ID_DISORDER
+    			  || type==CONST.NE_TYPE_ID_DRUG
+    			  || type==CONST.NE_TYPE_ID_FINDING
+    			  || type==CONST.NE_TYPE_ID_PROCEDURE
+    			  || type==CONST.NE_TYPE_ID_ANATOMICAL_SITE
+    			  ) {
+    		  eMention = new EventMention(initView, first.start, last.end);
+    	  } else {
+    		  eMention = new IdentifiedAnnotation(initView, first.start, last.end);
+    	  }
+    	  
+    	  // set easy attributes
+    	  eMention.setTypeID(Mapper.getEntityTypeId(entityTypes.get(mentionId)));
+    	  eMention.setId(identifiedAnnotationId++);
+    	  eMention.setDiscoveryTechnique(CONST.NE_DISCOVERY_TECH_GOLD_ANNOTATION);
+    	  eMention.setConfidence(1);
+
+    	  if (mentionId.endsWith("4351")) {
+    		  System.out.println();
+    	  }
+    	  
+          List<ArgumentInfo> assocAttributes = getLeafAttributes(mentionId,
+        		  mentionAttr,attrPtr,attrs,new ArrayList<ArgumentInfo>());
+          
+          for (ArgumentInfo a : assocAttributes) {
+
+//    	  // set harder attributes from cas -- look through all attribute ids attached to this mentionId
+//    	  for (String attrId : mentionAttr.get(mentionId) ) {
+//    		  // make sure this attribute was actually somewhere in the knowtator file
+//    		  if (!attrs.containsKey(attrId)) {
+//    			  if (VERBOSE) { System.err.println("WARNING: attribute not found: "+attrId); }
+//    			  continue;
+//    		  }
+    			
+    		  // look up the attribute id and set values accordingly
+    		  checkForAttrValue(eMention, a.role, a.value);
+    	  }
+    	  
+    	  // add to CAS
+    	  eMention.addToIndexes();
+      }
+	}
+
+	private List<ArgumentInfo> getLeafAttributes(String id,
+			HashMap<String, List<String>> mentionAttr,
+			HashMap<String, ArgumentInfo> attrPtr, HashMap<String, ArgumentInfo> attrs, List<ArgumentInfo> output) {
+
+		// if this is a mention id
+		if (mentionAttr.containsKey(id)) {
+//			if (mentionAttr.get(id).size()>1 && VERBOSE) {
+//				System.err.println("WARNING: expected an attribute's mention to have only one attr, but not so");
+//			}
+			for (String attrId : mentionAttr.get(id)) {
+				// assumes that if you're in an attribute mention, you only have one value
+				if (attrPtr.containsKey(attrId)) {
+					ArgumentInfo a = attrPtr.get(attrId);
+					if ( !isRelationArgument(attrPtr.get(attrId).role) )
+						getLeafAttributes(attrPtr.get(attrId).value, mentionAttr, attrPtr, attrs, output);
+					
+				} else if (attrs.containsKey(attrId)){
+					ArgumentInfo a = attrs.get(attrId);
+					if ( !isRelationArgument(attrs.get(attrId).role) )
+						output.add(attrs.get(attrId));
+				}
+			}
+		} 
+		
+		// if this is an attribute id
+		else if (attrPtr.containsKey(id)) {
+			if ( !attrPtr.get(id).role.equals("Related_to_CU") 
+					& !attrPtr.get(id).role.equals("Argument_CU") )
+				getLeafAttributes(attrPtr.get(id).value, mentionAttr, attrPtr, attrs, output);
+		} else if (attrs.containsKey(id)){
+			if ( !attrs.get(id).role.equals("Related_to_CU") 
+					&& !attrs.get(id).role.equals("Argument_CU") )
+				output.add(attrs.get(id));
+		}
+		
+		return output;
+	}
+
+	private boolean isRelationArgument(String role) {
+		if (normalizeRoleName(role).equals("Related_to")) {
+			return true;
+		} else if (normalizeRoleName(role).equals("Argument")) {
+			return true;
+		}
+		return false;
+	}
+
+	private void checkForAttrValue(IdentifiedAnnotation eMention, String role,
+			String value) {
+		if (role.contains("_normalization")) {
+			  if (role.startsWith("conditional")) {
+				  eMention.setConditional(Boolean.valueOf(value));
+			  } else if (role.startsWith("generic")) {
+				  eMention.setGeneric(Boolean.valueOf(value));
+			  } else if (role.startsWith("negation_indicator")) {
+				  // assumes that the string from Knowtator is exactly "negation_present"
+				  if (value.equals("negation_present")) { 
+					  eMention.setPolarity(CONST.NE_POLARITY_NEGATION_PRESENT);
+				  } else {
+					  eMention.setPolarity(CONST.NE_POLARITY_NEGATION_ABSENT);
+				  }
+			  } else if (role.startsWith("subject")) {
+				  // assumes that the strings from Knowtator are exactly what's in the type system
+				  eMention.setSubject(value);
+			  } else if (role.startsWith("uncertainty_indicator")) {
+				  // assumes that the string from Knowtator is exactly "indicator_present"
+				  if (value.equals("indicator_present")) { 
+					  eMention.setUncertainty(CONST.NE_UNCERTAINTY_PRESENT);
+				  } else {
+					  eMention.setUncertainty(CONST.NE_UNCERTAINTY_ABSENT);
+				  }
+			  } else if (role.startsWith("generic")) {
+				  eMention.setGeneric(Boolean.valueOf(value));
+			  }
+		  }
+	}
+
+	// Takes the Knowtator schema value and filters out things that are not NE.
+	//   In principle can have a parallel "filterToAttributesOnly"
+	private HashMap<String, ArrayList<Span>> filterToNamedEntitiesOnly(
+			HashMap<String, ArrayList<Span>> entityMentions,
+			HashMap<String, String> entityTypes) {
+		HashMap<String, ArrayList<Span>> newEntityMentions = new HashMap<String, ArrayList<Span>>(); 
+		
+		for (Entry<String, String> etype : entityTypes.entrySet()) {
+			if (etype.getValue().equals("Anatomical_site") 
+					|| etype.getValue().equals("Disease_Disorder")					
+						|| etype.getValue().equals("Lab")					
+							|| etype.getValue().equals("Medications")					
+								|| etype.getValue().equals("Procedure")					
+									|| etype.getValue().equals("Sign_symptom")					
+			) {
+				if (entityMentions.containsKey(etype.getKey())) {
+					newEntityMentions.put(etype.getKey(),entityMentions.get(etype.getKey()));
+				}
+			}
+		}
+		
+		return newEntityMentions;
+	}
+	
+	// Takes the Knowtator schema value and filters out things that are not NE.
+	//   In principle can have a parallel "filterToAttributesOnly"
+	private boolean filterToNamedEntitiesOnly(
+			HashMap<String, ArrayList<Span>> entityMentions,
+			String typeKey, String typeValue) {
+
+		if (typeValue.toLowerCase().equals("Anatomical_site") 
+				|| typeValue.toLowerCase().equals("Disease_Disorder")					
+				|| typeValue.toLowerCase().equals("Lab")					
+				|| typeValue.toLowerCase().equals("Medications")					
+				|| typeValue.toLowerCase().equals("Procedure")					
+				|| typeValue.toLowerCase().equals("Sign_symptom")					
+		) {
+			if (entityMentions.containsKey(typeKey)) {
+				return true;
+			}
+		}
+
+		return false;
+	}
+		
+	/**
+	 * Convert Argument_CU and Related_to_CU to Argument and Related_to.
+	 * This will not be necessary in the future when the data will be 
+	 * post-processed to remove _CU suffixes. 
+	 * 
+	 * Currently mipacq data does not have the suffixes and sharp data does.
+	 */
+	private static String normalizeRoleName(String role) {
+
+		if(role.equals("Argument_CU")) {
+			return "Argument";
+		} 
+
+		if(role.equals("Related_to_CU")) {
+			return "Related_to";
+		}
+
+		return role;
+
+	}
+}
\ No newline at end of file

Added: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/RelationInfo.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/RelationInfo.java?rev=1411758&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/RelationInfo.java (added)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/RelationInfo.java Tue Nov 20 17:44:30 2012
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.assertion.cr;
+
+import java.util.HashSet;
+
+import com.google.common.base.Objects;
+
+/**
+ * Information about a relation that includes the info about the relation's arguments.
+ * 
+ * @author dmitriy dligach
+ *
+ */
+public class RelationInfo {
+
+  public String id1;       // id of the first argument
+  public String id2;       // id of the second argument
+  public String role1;     // position of first arg (e.g. Argument)
+  public String role2;     // semantic type of second arg (e.g. Related_to)
+  public String category;  // relation type e.g. co_occurs_with
+  
+  RelationInfo(String id1, String id2, String role1, String role2, String category) {
+    this.id1 = id1; // id of the first argument
+    this.id2 = id2; // id of the second argument
+    this.role1 = role1;
+    this.role2 = role2;
+    this.category = category;
+  }
+  
+  @Override
+  public String toString() {
+  	return String.format("<%s, %s, %s, %s, %s>", id1, id2, role1, role2, category);
+  }
+  
+  /*
+   * Returns true if two relation instances (represented as RelationInfo objects)
+   * have the same arguments. Useful for debugging mipacq data which may contain duplicate relation instances. 
+   */
+  @Override
+  public boolean equals(Object object) {
+  	
+  	boolean isEqual = false;
+  			
+  	if(this.getClass() == object.getClass()) {
+  		RelationInfo relationInfo = (RelationInfo) object;
+  		isEqual = (this.id1.equals(relationInfo.id1) && this.id2.equals(relationInfo.id2));
+  	}
+  	
+  	return isEqual;
+  }
+  
+  /*
+   * Hash code must match equals() method. 
+   */
+  @Override
+  public int hashCode()
+  {
+  	return Objects.hashCode(this.id1, this.id2);
+  }
+  
+  public static void main(String[] args) {
+	
+  	RelationInfo ri1 = new RelationInfo("1", "2", "Argument", "Related_to", "location_of");
+  	RelationInfo ri2 = new RelationInfo("1", "2", "zzzzzzzz", "xxxxxxxxxx", "yyyyyyyyyyy");
+  	RelationInfo ri3 = new RelationInfo("1", "2", "kkkkkkkk", "llllllllll", "mmmmmmmmmmm");
+  	
+  	System.out.println(ri1.equals(ri2));
+  	
+  	HashSet<RelationInfo> uniqueRelations = new HashSet<RelationInfo>();
+  	
+  	System.out.println(ri1.hashCode() + "\t" + ri2.hashCode());
+  	
+  	uniqueRelations.add(ri1);
+  	uniqueRelations.add(ri2);
+  	
+  	System.out.println(uniqueRelations);
+  	
+  	System.out.println(uniqueRelations.contains(ri1));
+  	System.out.println(uniqueRelations.contains(ri2));
+  	System.out.println(uniqueRelations.contains(ri3));
+  }
+}
\ No newline at end of file

Added: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/Span.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/Span.java?rev=1411758&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/Span.java (added)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/Span.java Tue Nov 20 17:44:30 2012
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.assertion.cr;
+
+
+/**
+ * Represents span of a named entity
+ * 
+ * @author dmitriy dligach
+ *
+ */
+public class Span {
+	
+	public int start;
+	public int end;
+	
+	public Span(int start, int end) {
+		this.start = start;
+		this.end = end;
+	}
+	
+	@Override
+	public boolean equals(Object object) {
+		
+		boolean isEqual = false;
+		
+		if(object instanceof Span) {
+			Span span = (Span) object;
+			isEqual = ((this.start == span.start) && (this.end == span.end));
+		}
+		
+		return isEqual;
+	}
+	
+//	@Override
+//  public int hashCode()
+//  {
+//  	return Objects.hashCode(start, end);
+//  }
+	
+	public String toString() {
+		return String.format("%d -- %d", start, end);
+	}
+}
\ No newline at end of file

Added: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/XMLReader.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/XMLReader.java?rev=1411758&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/XMLReader.java (added)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/XMLReader.java Tue Nov 20 17:44:30 2012
@@ -0,0 +1,262 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.assertion.cr;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+
+import org.jdom.Document;
+import org.jdom.Element;
+
+public class XMLReader {
+
+	static String[] attrHeadings = {"booleanSlotMention","stringSlotMention"};
+	private static boolean VERBOSE = true; 
+	
+	/**
+	 * Get spans of named entity annotations indexed on knowtator mention id
+	 */
+  public static HashMap<String, ArrayList<Span>> getEntityMentions(Document document) {
+
+  	// key: mention id, value: list of spans (need a list to handle disjoint spans)
+  	HashMap<String, ArrayList<Span>> entityMentions = new HashMap<String, ArrayList<Span>>(); 
+
+      Element elementRoot = document.getRootElement();
+      List<?> annotations = elementRoot.getChildren("annotation");
+
+      for (int i = 0; i < annotations.size(); i++) {
+        Element elementAnnotation = (Element) annotations.get(i);
+
+        List<?> elementSpans = elementAnnotation.getChildren("span");
+
+        if(elementSpans.size() == 0) {
+          continue; // spanless annotation, e.g. a relation; there should be no spannedText                                    
+        }
+
+        ArrayList<Span> spans = new ArrayList<Span>();
+        for(int j = 0; j < elementSpans.size(); j++) {
+          Element elementSpan = (Element) elementSpans.get(j);
+
+          String start = elementSpan.getAttributeValue("start");
+          String end = elementSpan.getAttributeValue("end");
+
+          Span span = new Span(Integer.parseInt(start), Integer.parseInt(end));
+          spans.add(span);
+        }
+
+        String mentionId = elementAnnotation.getChild("mention").getAttributeValue("id");                          
+        
+        entityMentions.put(mentionId, spans);
+      }
+    return entityMentions;
+  }
+
+  /**
+   * Type of each named entity indexed on mention ids
+   */
+  public static HashMap<String, String> getEntityTypes(Document document) {
+
+    // key: mention id, value: semantic type of the corresponding entity (e.g. "sign_symptom")                                      
+    HashMap<String, String> entityTypes = new HashMap<String, String>();
+
+      Element root = document.getRootElement();
+      List<?> classMentions = root.getChildren("classMention");
+
+      for (int i = 0; i < classMentions.size(); i++) {
+        Element classMention = (Element) classMentions.get(i);
+        String id = classMention.getAttributeValue("id");
+        String cl = classMention.getChildText("mentionClass");
+        entityTypes.put(id, cl);
+      }
+    return entityTypes;
+  }
+  
+  /**
+   * Attribute mention IDs of each named entity, indexed on mention ids
+   * Filter based on entity types, so non-NE get left out.
+   */
+  public static HashMap<String, List<String>> getEntityAttributes(Document document) {
+
+    // key: mention id, value: list of attributes associated with this                                  
+    HashMap<String, List<String>> entityAttr = new HashMap<String, List<String>>();
+
+    Element root = document.getRootElement();
+    List<?> classMentions = root.getChildren("classMention");
+
+    for (int i = 0; i < classMentions.size(); i++) {
+    	Element classMention = (Element) classMentions.get(i);
+    	String id = classMention.getAttributeValue("id");
+
+    	List<String> lsAttr = new ArrayList<String>(); 
+    	List lsSlotMention = classMention.getChildren("hasSlotMention");
+
+    	for (Object slotMention : lsSlotMention) {
+    		String slotid = ((Element) slotMention).getAttributeValue("id");
+    		lsAttr.add(slotid);
+    	}
+    	
+    	entityAttr.put(id, lsAttr);
+    }
+    return entityAttr;
+  }
+  
+  /**
+   * Attributes of each named entity indexed on mention ids. For now, manually handles different types of Slots
+   */
+  public static HashMap<String, ArgumentInfo> getAttributes(Document document) {
+	  return getAttributes(document,attrHeadings);
+  }
+  
+  /**
+   * Attributes of each named entity indexed on mention ids. For now, manually handles different types of Slots
+   */
+  public static HashMap<String, ArgumentInfo> getAttributes(Document document, String[] headings) {
+	  
+	  // key: mention id value: map from attribute to attribute value                                      
+	  HashMap<String, ArgumentInfo> entityAttr = new HashMap<String,ArgumentInfo>();
+
+	  Element root = document.getRootElement();
+
+//	  // key: complexSlotMention id, value: complexSlotMention value                                                           
+//	  List<ArgumentInfo> listSlotMentions = new ArrayList<ArgumentInfo>();
+
+	  // read all ??SlotMentions which additional slots                         
+	  for (String heading : headings) {
+		  List<?> slotMentions = root.getChildren(heading);
+		  for (int i = 0; i < slotMentions.size(); i++) {
+			  Element complexSlotMention = (Element) slotMentions.get(i);
+
+			  String id = complexSlotMention.getAttributeValue("id");
+			  String value = complexSlotMention.getChild(heading+"Value").getAttributeValue("value");
+			  String attr = complexSlotMention.getChild("mentionSlot").getAttributeValue("id"); // e.g. "Related_to"             
+
+			  if (entityAttr.containsKey(id)) {
+				  if (VERBOSE ) { System.err.println("WARNING: found more than one attribute in an attribute mention"); }
+				  entityAttr.put(id,new ArgumentInfo(value, normalizeName(attr)));
+			  } else {
+				  entityAttr.put(id,new ArgumentInfo(value, normalizeName(attr)));
+			  }
+			  //		  listSlotMentions.put(id, new ArgumentInfo(value, normalizeName(attr)));		  
+		  }
+	  }
+//	  // now read all classMentions which have relation type and arguments (as hasSlotMention(s))                                 
+//	  List<?> classMentions = root.getChildren("classMention");
+//	  for (int i = 0; i < classMentions.size(); i++) {
+//		  Element classMention = (Element) classMentions.get(i);
+//		  List<?> hasSlotMentions = classMention.getChildren("hasSlotMention");
+//
+//		  if(hasSlotMentions.size() >= 2) {
+//			  String relationType = classMention.getChildText("mentionClass");
+////			  addRelation(relations, hasSlotMentions, hashComplexSlotMentions, relationType);  // save this relation and args
+//		  }
+//	  }
+	  return entityAttr;
+  }
+  
+  public static ArrayList<RelationInfo> getRelations(Document document) {
+
+    ArrayList<RelationInfo> relations = new ArrayList<RelationInfo>();
+
+      Element root = document.getRootElement();
+
+      // key: complexSlotMention id, value: complexSlotMention value                                                           
+      HashMap<String, ArgumentInfo> hashComplexSlotMentions = new HashMap<String, ArgumentInfo>();
+
+      // first read all complexSlotMentions which contain argument roles (Related_to or Argument)                         
+      List<?> complexSlotMentions = root.getChildren("complexSlotMention");
+      for (int i = 0; i < complexSlotMentions.size(); i++) {
+        Element complexSlotMention = (Element) complexSlotMentions.get(i);
+
+        String id = complexSlotMention.getAttributeValue("id");
+        String value = complexSlotMention.getChild("complexSlotMentionValue").getAttributeValue("value");
+        String role = complexSlotMention.getChild("mentionSlot").getAttributeValue("id"); // e.g. "Related_to"             
+
+        hashComplexSlotMentions.put(id, new ArgumentInfo(value, normalizeName(role)));
+      }
+
+      // now read all classMentions which have relation type and arguments (as hasSlotMention(s))                                 
+      List<?> classMentions = root.getChildren("classMention");
+      for (int i = 0; i < classMentions.size(); i++) {
+        Element classMention = (Element) classMentions.get(i);
+        List<?> hasSlotMentions = classMention.getChildren("hasSlotMention");
+
+        if(hasSlotMentions.size() >= 2) {
+          String relationType = classMention.getChildText("mentionClass");
+          addRelation(relations, hasSlotMentions, hashComplexSlotMentions, relationType);  // save this relation and args
+        }
+      }
+    return relations;
+  }
+
+  private static void addRelation(ArrayList<RelationInfo> relations, List<?> hasSlotMentions, 
+  		HashMap<String, ArgumentInfo> hashComplexSlotMentions, String relationType) {
+  	// add relation arguments and other relation information to the list of relations                                                  
+
+  	// get the ids of the arguments; sometimes there are three hasSlotMention(s) but not all of them are arguments             
+  	ArrayList<String> ids = new ArrayList<String>();
+  	for(int i = 0; i < hasSlotMentions.size(); i++) {
+  		String id = ((Element) hasSlotMentions.get(i)).getAttributeValue("id");
+  		if(hashComplexSlotMentions.containsKey(id)) {
+    		String role = hashComplexSlotMentions.get(id).role;
+    		// check the role explicitly; in sharp data (unlike in mipacq), one
+    		// of the hasSlotMention(s) can be a negation attribute with a span
+  			if(role.equals("Argument") || role.equals("Related_to")) {
+  				ids.add(id);                               
+  			}
+  		}
+  	}
+   	
+  	// exactly two arguments are allowed
+  	if(ids.size() != 2) {
+  		return; 
+  	}
+  	
+  	String id1 = hashComplexSlotMentions.get(ids.get(0)).value;          // obtain mention id1                                       
+  	String role1 = hashComplexSlotMentions.get(ids.get(0)).role;         // e.g. Argument                                                                                             
+
+  	String id2 = hashComplexSlotMentions.get(ids.get(1)).value;          // obtain mention id2                                       
+  	String role2 = hashComplexSlotMentions.get(ids.get(1)).role;         // e.g. Related_to     
+
+  	relations.add(new RelationInfo(id1, id2, role1, role2, relationType));
+  }
+  
+  /**
+   * Convert Argument_CU and Related_to_CU to Argument and Related_to.
+   * This will not be necessary in the future when the data will be 
+   * post-processed to remove _CU suffixes. 
+   * 
+   * Currently mipacq data does not have the suffixes and sharp data does.
+   */
+  private static String normalizeName(String role) {
+  	
+  	if(role.equals("Argument_CU")) {
+  		return "Argument";
+  	} 
+  	
+  	if(role.equals("Related_to_CU")) {
+  		return "Related_to";
+  	}
+  	
+  	return role;
+  		
+  }
+}

Added: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/XmiCollectionReaderCtakes.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/XmiCollectionReaderCtakes.java?rev=1411758&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/XmiCollectionReaderCtakes.java (added)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/XmiCollectionReaderCtakes.java Tue Nov 20 17:44:30 2012
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.ctakes.assertion.cr;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.impl.XmiCasDeserializer;
+import org.apache.uima.collection.CollectionException;
+import org.apache.uima.collection.CollectionReader_ImplBase;
+import org.apache.uima.resource.ResourceConfigurationException;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.Progress;
+import org.apache.uima.util.ProgressImpl;
+import org.xml.sax.SAXException;
+
+/**
+ * A simple collection reader that reads CASes in XMI format from a directory in the filesystem.
+ */
+public class XmiCollectionReaderCtakes extends CollectionReader_ImplBase {
+  /**
+   * Name of configuration parameter that must be set to the path of a directory containing the XMI
+   * files.
+   */
+  public static final String PARAM_INPUTDIR = "InputDirectory";
+  
+  /**
+   * Name of the configuration parameter that must be set to indicate if the
+   * execution fails if an encountered type is unknown
+   */
+  public static final String PARAM_FAILUNKNOWN = "FailOnUnknownType";
+
+  private Boolean mFailOnUnknownType;
+  
+  private ArrayList mFiles;
+
+  private int mCurrentIndex;
+
+  /**
+   * @see org.apache.uima.collection.CollectionReader_ImplBase#initialize()
+   */
+  public void initialize() throws ResourceInitializationException {
+	mFailOnUnknownType = (Boolean) getConfigParameterValue(PARAM_FAILUNKNOWN);
+	if (null == mFailOnUnknownType) {
+	  mFailOnUnknownType = true;  // default to true if not specified
+	}
+    File directory = new File(((String) getConfigParameterValue(PARAM_INPUTDIR)).trim());
+    mCurrentIndex = 0;
+
+    // if input directory does not exist or is not a directory, throw exception
+    if (!directory.exists() || !directory.isDirectory()) {
+      throw new ResourceInitializationException(ResourceConfigurationException.DIRECTORY_NOT_FOUND,
+              new Object[] { PARAM_INPUTDIR, this.getMetaData().getName(), directory.getPath() });
+    }
+
+    // get list of .xmi files in the specified directory
+    mFiles = new ArrayList();
+    File[] files = directory.listFiles();
+    for (int i = 0; i < files.length; i++) {
+      if (!files[i].isDirectory() && files[i].getName().endsWith(".xmi")) {
+        mFiles.add(files[i]);
+      }
+    }
+  }
+
+  /**
+   * @see org.apache.uima.collection.CollectionReader#hasNext()
+   */
+  public boolean hasNext() {
+    return mCurrentIndex < mFiles.size();
+  }
+
+  /**
+   * @see org.apache.uima.collection.CollectionReader#getNext(org.apache.uima.cas.CAS)
+   */
+  public void getNext(CAS aCAS) throws IOException, CollectionException {
+    File currentFile = (File) mFiles.get(mCurrentIndex++);
+    FileInputStream inputStream = new FileInputStream(currentFile);
+    try {
+    	XmiCasDeserializer.deserialize(inputStream, aCAS, ! mFailOnUnknownType);
+    } catch (SAXException e) {
+      throw new CollectionException(e);
+    } finally {
+      inputStream.close();
+    }
+  }
+
+  /**
+   * @see org.apache.uima.collection.base_cpm.BaseCollectionReader#close()
+   */
+  public void close() throws IOException {
+  }
+
+  /**
+   * @see org.apache.uima.collection.base_cpm.BaseCollectionReader#getProgress()
+   */
+  public Progress[] getProgress() {
+    return new Progress[] { new ProgressImpl(mCurrentIndex, mFiles.size(), Progress.ENTITIES) };
+  }
+
+}

Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvalBasedOnModifier.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvalBasedOnModifier.java?rev=1411758&r1=1411757&r2=1411758&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvalBasedOnModifier.java (original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvalBasedOnModifier.java Tue Nov 20 17:44:30 2012
@@ -25,6 +25,8 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
 
 import org.apache.log4j.Logger;
 import org.apache.uima.analysis_engine.AnalysisEngine;
@@ -53,7 +55,14 @@ import org.cleartk.eval.AnnotationStatis
 import org.cleartk.eval.Evaluation_ImplBase;
 import org.cleartk.util.Options_ImplBase;
 import org.kohsuke.args4j.Option;
+import org.kohsuke.args4j.spi.BooleanOptionHandler;
 import org.apache.ctakes.assertion.medfacts.cleartk.AssertionCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.ConditionalCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.GenericCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.PolarityCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.SubjectCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.UncertaintyCleartkAnalysisEngine;
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
 import org.uimafit.component.JCasAnnotator_ImplBase;
 import org.uimafit.factory.AggregateBuilder;
 import org.uimafit.factory.AnalysisEngineFactory;
@@ -82,7 +91,7 @@ import org.apache.ctakes.typesystem.type
 import org.apache.ctakes.typesystem.type.textsem.Modifier;
 import org.apache.ctakes.typesystem.type.textspan.Sentence;
 
-public class AssertionEvalBasedOnModifier extends Evaluation_ImplBase<File, AnnotationStatistics> {
+public class AssertionEvalBasedOnModifier extends Evaluation_ImplBase<File, Map<String, AnnotationStatistics>> {
   
   private static Logger logger = Logger.getLogger(AssertionEvalBasedOnModifier.class); 
 
@@ -105,18 +114,58 @@ public class AssertionEvalBasedOnModifie
         required = true)
     public File modelsDirectory;
     
+    @Option(
+            name = "--run-polarity",
+            usage = "specify whether polarity processing should be run (true or false). default: true",
+            required = false)
+    public boolean runPolarity = true;
+        
+    @Option(
+            name = "--run-conditional",
+            usage = "specify whether conditional processing should be run (true or false). default: true",
+            required = false)
+    public boolean runConditional = true;
+        
+    @Option(
+            name = "--run-uncertainty",
+            usage = "specify whether uncertainty processing should be run (true or false). default: true",
+            required = false)
+    public boolean runUncertainty = true;
+        
+    @Option(
+            name = "--run-subject",
+            usage = "specify whether subject processing should be run (true or false). default: true",
+            required = false,
+            handler=BooleanOptionHandler.class)
+    public boolean runSubject = true;
+        
+    @Option(
+            name = "--run-generic",
+            usage = "specify whether generic processing should be run (true or false). default: true",
+            required = false)
+    public boolean runGeneric = true;
+        
     
   }
+  
+  protected ArrayList<String> annotationTypes;
 
   private Class<? extends AssertionCleartkAnalysisEngine> classifierAnnotatorClass;
 
   private Class<? extends DataWriterFactory<String>> dataWriterFactoryClass;
 
   
+  protected static Options options = new Options();
   
   public static void main(String[] args) throws Exception {
-    Options options = new Options();
+    //Options options = new Options();
     options.parseOptions(args);
+    
+    System.err.println("forcing skipping of subject processing!!!");
+    options.runSubject = false;
+    System.err.println("forcing skipping of generic processing!!!");
+    options.runGeneric = false;
+    printOptionsForDebugging(options);
     List<File> trainFiles = Arrays.asList(options.trainDirectory.listFiles());
     //File modelsDir = new File("models/modifier");
     File modelsDir = options.modelsDirectory;
@@ -134,9 +183,17 @@ public class AssertionEvalBasedOnModifie
     
     Class<? extends AssertionCleartkAnalysisEngine> annotatorClass = AssertionCleartkAnalysisEngine.class;
 
+    //String [] annotationTypes = { "polarity", "conditional", "uncertainty", "subject", "generic" };
+    ArrayList<String> annotationTypes = new ArrayList<String>();
+    if (options.runPolarity) { annotationTypes.add("polarity"); }
+    if (options.runConditional) { annotationTypes.add("conditional"); }
+    if (options.runUncertainty) { annotationTypes.add("uncertainty"); }
+    if (options.runSubject) { annotationTypes.add("subject"); }
+    if (options.runGeneric) { annotationTypes.add("generic"); }
     
     AssertionEvalBasedOnModifier evaluation = new AssertionEvalBasedOnModifier(
         modelsDir,
+        annotationTypes,
         annotatorClass,
         dataWriterFactoryClass
         );
@@ -164,19 +221,26 @@ public class AssertionEvalBasedOnModifie
     
     if(options.testDirectory == null) {
       // run n-fold cross-validation
-      List<AnnotationStatistics> foldStats = evaluation.crossValidation(trainFiles, 2);
+      List<Map<String, AnnotationStatistics>> foldStats = evaluation.crossValidation(trainFiles, 2);
       //AnnotationStatistics overallStats = AnnotationStatistics.addAll(foldStats);
-      AnnotationStatistics overallStats = new AnnotationStatistics();
-      for (AnnotationStatistics singleFoldStats : foldStats)
+      Map<String, AnnotationStatistics> overallStats = new TreeMap<String, AnnotationStatistics>();
+      
+      for (String currentAnnotationType : annotationTypes)
+      {
+    	  AnnotationStatistics currentAnnotationStatistics = new AnnotationStatistics();
+    	  overallStats.put(currentAnnotationType, currentAnnotationStatistics);
+      }
+      for (Map<String, AnnotationStatistics> singleFoldMap : foldStats)
       {
-        overallStats.addAll(singleFoldStats);
+    	  for (String currentAnnotationType : annotationTypes)
+    	  {
+    	    AnnotationStatistics currentFoldStatistics = singleFoldMap.get(currentAnnotationType);
+    	    overallStats.get(currentAnnotationType).addAll(currentFoldStatistics);
+    	  }
       }
       
-      System.err.println("overall:");
-      System.err.print(overallStats);
-      System.err.println(overallStats.confusions());
-      System.err.println();
-
+      AssertionEvalBasedOnModifier.printScore(overallStats,  "CROSS FOLD OVERALL");
+      
     } else {
       // train on the entire training set and evaluate on the test set
       List<File> testFiles = Arrays.asList(options.testDirectory.listFiles());
@@ -185,21 +249,65 @@ public class AssertionEvalBasedOnModifie
       evaluation.train(trainCollectionReader, modelsDir);
       
       CollectionReader testCollectionReader = evaluation.getCollectionReader(testFiles);
-      AnnotationStatistics stats = evaluation.test(testCollectionReader, modelsDir);
-      return;
+      Map<String, AnnotationStatistics> stats = evaluation.test(testCollectionReader, modelsDir);
+      
+      AssertionEvalBasedOnModifier.printScore(stats,  modelsDir.getAbsolutePath());
     }
     
+    System.out.println("Finished.");
+    
+  }
+  
+  private static void printOptionsForDebugging(Options options)
+  {
+	System.out.format(
+		"training dir: %s%n" +
+	    "test dir: %s%n" + 
+	    "model dir: %s%n" +
+	    "run polarity: %b%n" +
+	    "run conditional: %b%n" +
+	    "run uncertainty: %b%n" +
+	    "run subject: %b%n" +
+	    "run generic: %b%n" +
+	    "%n%n",
+	    options.trainDirectory.getAbsolutePath(),
+	    options.testDirectory.getAbsolutePath(),
+	    options.modelsDirectory.getAbsolutePath(),
+	    options.runPolarity,
+	    options.runConditional,
+	    options.runUncertainty,
+	    options.runSubject,
+	    options.runGeneric
+	    );
+  }
+
+public static void printScore(Map<String, AnnotationStatistics> map, String directory)
+  {
+      for (Map.Entry<String, AnnotationStatistics> currentEntry : map.entrySet())
+	  {
+    	  String annotationType = currentEntry.getKey();
+    	  AnnotationStatistics stats = currentEntry.getValue();
+    	  
+    	  System.out.format("directory: \"%s\"; assertion type: %s%n%n%s%n%n",
+    	    directory,
+    	    annotationType.toUpperCase(),
+    	    stats.toString());
+	  }
+      
   }
 
   private String[] trainingArguments;
 
   public AssertionEvalBasedOnModifier(
       File directory,
+      ArrayList<String> annotationTypes,
       Class<? extends AssertionCleartkAnalysisEngine> classifierAnnotatorClass,
       Class<? extends DataWriterFactory<String>> dataWriterFactoryClass,
       String... trainingArguments
       ) {
     super(directory);
+    
+    this.annotationTypes = annotationTypes;
 
     this.classifierAnnotatorClass = classifierAnnotatorClass;
     this.dataWriterFactoryClass = dataWriterFactoryClass;
@@ -216,7 +324,7 @@ public class AssertionEvalBasedOnModifie
     }
     return CollectionReaderFactory.createCollectionReader(
         XMIReader.class,
-        TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath("../common-type-system/desc/common_type_system.xml"),
+        TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath(),
         XMIReader.PARAM_FILES,
         paths);
   }
@@ -244,17 +352,80 @@ public class AssertionEvalBasedOnModifie
     AnalysisEngineDescription assertionAttributeClearerAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ReferenceAnnotationsSystemAssertionClearer.class);
     builder.add(assertionAttributeClearerAnnotator);
     
-    AnalysisEngineDescription assertionAnnotator = AnalysisEngineFactory.createPrimitiveDescription(AssertionCleartkAnalysisEngine.class); //,  this.additionalParamemters);
-    ConfigurationParameterFactory.addConfigurationParameters(
-        assertionAnnotator,
-        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
-        AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
-        CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
-        this.dataWriterFactoryClass.getName(),
-        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
-        directory.getPath()
-        );
-    builder.add(assertionAnnotator);
+    if (options.runPolarity)
+    {
+	    AnalysisEngineDescription polarityAnnotator = AnalysisEngineFactory.createPrimitiveDescription(PolarityCleartkAnalysisEngine.class); //,  this.additionalParamemters);
+	    ConfigurationParameterFactory.addConfigurationParameters(
+	        polarityAnnotator,
+	        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+	        AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+	        CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
+	        this.dataWriterFactoryClass.getName(),
+	        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+	        new File(directory, "polarity").getPath()
+	        );
+	    builder.add(polarityAnnotator);
+    }
+
+    if (options.runConditional)
+    {
+	    AnalysisEngineDescription conditionalAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ConditionalCleartkAnalysisEngine.class); //,  this.additionalParamemters);
+	    ConfigurationParameterFactory.addConfigurationParameters(
+	        conditionalAnnotator,
+	        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+	        AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+	        CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
+	        this.dataWriterFactoryClass.getName(),
+	        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+	        new File(directory, "conditional").getPath()
+	        );
+	    builder.add(conditionalAnnotator);
+    }
+
+    if (options.runUncertainty)
+    {
+	    AnalysisEngineDescription uncertaintyAnnotator = AnalysisEngineFactory.createPrimitiveDescription(UncertaintyCleartkAnalysisEngine.class); //,  this.additionalParamemters);
+	    ConfigurationParameterFactory.addConfigurationParameters(
+	        uncertaintyAnnotator,
+	        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+	        AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+	        CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
+	        this.dataWriterFactoryClass.getName(),
+	        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+	        new File(directory, "uncertainty").getPath()
+	        );
+	    builder.add(uncertaintyAnnotator);
+    }
+
+    if (options.runSubject)
+    {
+	    AnalysisEngineDescription subjectAnnotator = AnalysisEngineFactory.createPrimitiveDescription(SubjectCleartkAnalysisEngine.class); //,  this.additionalParamemters);
+	    ConfigurationParameterFactory.addConfigurationParameters(
+	        subjectAnnotator,
+	        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+	        AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+	        CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
+	        this.dataWriterFactoryClass.getName(),
+	        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+	        new File(directory, "subject").getPath()
+	        );
+	    builder.add(subjectAnnotator);
+    }
+
+    if (options.runGeneric)
+    {
+		AnalysisEngineDescription genericAnnotator = AnalysisEngineFactory.createPrimitiveDescription(GenericCleartkAnalysisEngine.class); //,  this.additionalParamemters);
+		ConfigurationParameterFactory.addConfigurationParameters(
+		    genericAnnotator,
+		    AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+		    AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+		    CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
+		    this.dataWriterFactoryClass.getName(),
+		    DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+		    new File(directory, "generic").getPath()
+		    );
+		builder.add(genericAnnotator);
+    }
 
 /*
     AnalysisEngineDescription classifierAnnotator = AnalysisEngineFactory.createPrimitiveDescription(
@@ -274,12 +445,16 @@ public class AssertionEvalBasedOnModifie
     SimplePipeline.runPipeline(collectionReader,  builder.createAggregateDescription());
     
     HideOutput hider = new HideOutput();
-    JarClassifierBuilder.trainAndPackage(directory, this.trainingArguments);
+    for (String currentAssertionAttribute : annotationTypes)
+    {
+    	File currentDirectory = new File(directory, currentAssertionAttribute);
+    	JarClassifierBuilder.trainAndPackage(currentDirectory, trainingArguments);
+    }
     hider.restoreOutput();
   }
 
   @Override
-  protected AnnotationStatistics test(CollectionReader collectionReader, File directory)
+  protected Map<String, AnnotationStatistics> test(CollectionReader collectionReader, File directory)
       throws Exception {
 //    AnalysisEngine classifierAnnotator = AnalysisEngineFactory.createPrimitive(AssertionCleartkAnalysisEngine.getDescription(
 //        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
@@ -293,21 +468,107 @@ public class AssertionEvalBasedOnModifie
     AnalysisEngineDescription assertionAttributeClearerAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ReferenceAnnotationsSystemAssertionClearer.class);
     builder.add(assertionAttributeClearerAnnotator);
     
-    AnalysisEngineDescription assertionAnnotator = AnalysisEngineFactory.createPrimitiveDescription(AssertionCleartkAnalysisEngine.class); //,  this.additionalParamemters);
-    ConfigurationParameterFactory.addConfigurationParameters(
-        assertionAnnotator,
-        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
-        AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
-        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
-        new File(directory, "model.jar").getPath()
-        );
-    builder.add(assertionAnnotator);
+    if (options.runPolarity)
+    {
+	    AnalysisEngineDescription polarityAnnotator = AnalysisEngineFactory.createPrimitiveDescription(PolarityCleartkAnalysisEngine.class); //,  this.additionalParamemters);
+	    ConfigurationParameterFactory.addConfigurationParameters(
+	        polarityAnnotator,
+	        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+	        AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+	        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+	        new File(new File(directory, "polarity"), "model.jar").getPath()
+	        );
+	    builder.add(polarityAnnotator);
+    }
+    
+    if (options.runConditional)
+    {
+	    AnalysisEngineDescription conditionalAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ConditionalCleartkAnalysisEngine.class); //,  this.additionalParamemters);
+	    ConfigurationParameterFactory.addConfigurationParameters(
+	        conditionalAnnotator,
+	        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+	        AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+	        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+	        new File(new File(directory, "conditional"), "model.jar").getPath()
+	        );
+	    builder.add(conditionalAnnotator);
+    }
+    
+    if (options.runUncertainty)
+    {
+	    AnalysisEngineDescription uncertaintyAnnotator = AnalysisEngineFactory.createPrimitiveDescription(UncertaintyCleartkAnalysisEngine.class); //,  this.additionalParamemters);
+	    ConfigurationParameterFactory.addConfigurationParameters(
+	        uncertaintyAnnotator,
+	        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+	        AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+	        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+	        new File(new File(directory, "uncertainty"), "model.jar").getPath()
+	        );
+	    builder.add(uncertaintyAnnotator);
+    }
+    
+    if (options.runSubject)
+    {
+	    AnalysisEngineDescription subjectAnnotator = AnalysisEngineFactory.createPrimitiveDescription(SubjectCleartkAnalysisEngine.class); //,  this.additionalParamemters);
+	    ConfigurationParameterFactory.addConfigurationParameters(
+	        subjectAnnotator,
+	        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+	        AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+	        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+	        new File(new File(directory, "subject"), "model.jar").getPath()
+	        );
+	    builder.add(subjectAnnotator);
+    }
+    
+    if (options.runGeneric)
+    {
+	    AnalysisEngineDescription genericAnnotator = AnalysisEngineFactory.createPrimitiveDescription(GenericCleartkAnalysisEngine.class); //,  this.additionalParamemters);
+	    ConfigurationParameterFactory.addConfigurationParameters(
+	        genericAnnotator,
+	        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+	        AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+	        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+	        new File(new File(directory, "generic"), "model.jar").getPath()
+	        );
+	    builder.add(genericAnnotator);
+    }
     
     //SimplePipeline.runPipeline(collectionReader,  builder.createAggregateDescription());
     AnalysisEngineDescription aggregateDescription = builder.createAggregateDescription();
     AnalysisEngine aggregate = builder.createAggregate();
     
-    AnnotationStatistics stats = new AnnotationStatistics();
+    AnnotationStatistics polarityStats = new AnnotationStatistics();
+    AnnotationStatistics conditionalStats = new AnnotationStatistics();
+    AnnotationStatistics uncertaintyStats = new AnnotationStatistics();
+    AnnotationStatistics subjectStats = new AnnotationStatistics();
+    AnnotationStatistics genericStats = new AnnotationStatistics();
+    
+    Map<String, AnnotationStatistics> map = new TreeMap<String, AnnotationStatistics>(); 
+    if (options.runPolarity)
+    {
+      map.put("polarity",  polarityStats);
+    }
+
+    if (options.runConditional)
+    {
+      map.put("conditional",  conditionalStats);
+    }
+
+    if (options.runUncertainty)
+    {
+      map.put("uncertainty",  uncertaintyStats);
+    }
+
+    if (options.runSubject)
+    {
+      map.put("subject", subjectStats);
+    }
+
+    if (options.runGeneric)
+    {
+      map.put("generic", genericStats);
+    }
+
     for (JCas jCas : new JCasIterable(collectionReader, aggregate)) {
       JCas goldView;
       try {
@@ -315,21 +576,62 @@ public class AssertionEvalBasedOnModifie
       } catch (CASException e) {
         throw new AnalysisEngineProcessException(e);
       }
-      Collection<IdentifiedAnnotation> goldEntities = new ArrayList<IdentifiedAnnotation>(); 
-      goldEntities.addAll(JCasUtil.select(goldView, EntityMention.class));
-      goldEntities.addAll(JCasUtil.select(goldView, EventMention.class));
-      
-      Collection<IdentifiedAnnotation> systemEntities = new ArrayList<IdentifiedAnnotation>();
-      systemEntities.addAll(JCasUtil.select(jCas, EntityMention.class));
-      systemEntities.addAll(JCasUtil.select(jCas, EventMention.class));
-      
-      stats.add(goldEntities, systemEntities,
-		  AnnotationStatistics.<IdentifiedAnnotation>annotationToSpan(),
-		  AnnotationStatistics.<IdentifiedAnnotation>annotationToFeatureValue("polarity"));
-    }
-    System.err.println(directory.getName() + ":");
-    System.err.println(stats);
-    return stats;
+      
+      String documentId = DocumentIDAnnotationUtil.getDocumentID(jCas);
+      System.out.format("document id: %s%n", documentId);
+      
+      Collection<IdentifiedAnnotation> goldEntitiesAndEvents = new ArrayList<IdentifiedAnnotation>(); 
+      Collection<EntityMention> goldEntities = JCasUtil.select(goldView, EntityMention.class);
+	  goldEntitiesAndEvents.addAll(goldEntities);
+      Collection<EventMention> goldEvents = JCasUtil.select(goldView, EventMention.class);
+      goldEntitiesAndEvents.addAll(goldEvents);
+      System.out.format("gold entities: %d%ngold events: %d%n%n", goldEntities.size(), goldEvents.size());
+      
+      Collection<IdentifiedAnnotation> systemEntitiesAndEvents = new ArrayList<IdentifiedAnnotation>();
+      Collection<EntityMention> systemEntities = JCasUtil.select(jCas, EntityMention.class);
+      systemEntitiesAndEvents.addAll(systemEntities);
+      Collection<EventMention> systemEvents = JCasUtil.select(jCas, EventMention.class);
+      systemEntitiesAndEvents.addAll(systemEvents);
+      System.out.format("system entities: %d%nsystem events: %d%n%n", systemEntities.size(), systemEvents.size());
+      
+      if (options.runPolarity)
+      {
+	      polarityStats.add(goldEntitiesAndEvents, systemEntitiesAndEvents,
+			  AnnotationStatistics.<IdentifiedAnnotation>annotationToSpan(),
+			  AnnotationStatistics.<IdentifiedAnnotation>annotationToFeatureValue("polarity"));
+      }
+
+      if (options.runConditional)
+      {
+	      conditionalStats.add(goldEntitiesAndEvents, systemEntitiesAndEvents,
+			  AnnotationStatistics.<IdentifiedAnnotation>annotationToSpan(),
+			  AnnotationStatistics.<IdentifiedAnnotation>annotationToFeatureValue("conditional"));
+      }
+
+      if (options.runUncertainty)
+      {
+	      uncertaintyStats.add(goldEntitiesAndEvents, systemEntitiesAndEvents,
+			  AnnotationStatistics.<IdentifiedAnnotation>annotationToSpan(),
+			  AnnotationStatistics.<IdentifiedAnnotation>annotationToFeatureValue("uncertainty"));
+      }
+
+      if (options.runSubject)
+      {
+	      subjectStats.add(goldEntitiesAndEvents, systemEntitiesAndEvents,
+			  AnnotationStatistics.<IdentifiedAnnotation>annotationToSpan(),
+			  AnnotationStatistics.<IdentifiedAnnotation>annotationToFeatureValue("subject"));
+      }
+
+      if (options.runGeneric)
+      {
+	      genericStats.add(goldEntitiesAndEvents, systemEntitiesAndEvents,
+			  AnnotationStatistics.<IdentifiedAnnotation>annotationToSpan(),
+			  AnnotationStatistics.<IdentifiedAnnotation>annotationToFeatureValue("generic"));
+      }
+
+    }
+    
+    return map;
   }
 
   public static final String GOLD_VIEW_NAME = "GoldView";
@@ -413,134 +715,134 @@ public class AssertionEvalBasedOnModifie
     }
   }
   
-  public static class ReplaceGoldEntityMentionsAndModifiersWithCTakes extends
-      JCasAnnotator_ImplBase
-  {
-
-    @Override
-    public void process(JCas jCas) throws AnalysisEngineProcessException
-    {
-      JCas goldView, systemView;
-      try
-      {
-        goldView = jCas.getView(GOLD_VIEW_NAME);
-        systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
-      } catch (CASException e)
-      {
-        throw new AnalysisEngineProcessException(e);
-      }
-
-      // remove manual EntityMentions and Modifiers from gold view
-      List<IdentifiedAnnotation> goldMentions = new ArrayList<IdentifiedAnnotation>();
-      goldMentions.addAll(JCasUtil.select(goldView, EntityMention.class));
-      goldMentions.addAll(JCasUtil.select(goldView, Modifier.class));
-      for (IdentifiedAnnotation goldMention : goldMentions)
-      {
-        goldMention.removeFromIndexes();
-      }
-
-      // copy cTAKES EntityMentions and Modifiers to gold view
-      List<IdentifiedAnnotation> cTakesMentions = new ArrayList<IdentifiedAnnotation>();
-      cTakesMentions.addAll(JCasUtil.select(systemView, EntityMention.class));
-      cTakesMentions.addAll(JCasUtil.select(systemView, Modifier.class));
-      CasCopier copier = new CasCopier(systemView.getCas(), goldView.getCas());
-      for (IdentifiedAnnotation cTakesMention : cTakesMentions)
-      {
-        Annotation copy = (Annotation) copier.copyFs(cTakesMention);
-        Feature sofaFeature = copy.getType().getFeatureByBaseName("sofa");
-        copy.setFeatureValue(sofaFeature, goldView.getSofa());
-        copy.addToIndexes();
-      }
-
-      // replace gold EntityMentions and Modifiers in relations with cTAKES ones
-      List<BinaryTextRelation> relations = new ArrayList<BinaryTextRelation>();
-      relations.addAll(JCasUtil.select(goldView, BinaryTextRelation.class));
-      for (BinaryTextRelation relation : relations)
-      {
-
-        // attempt to replace the gold RelationArguments with system ones
-        int replacedArgumentCount = 0;
-        for (RelationArgument relArg : Arrays.asList(relation.getArg1(),
-            relation.getArg2()))
-        {
-          Annotation goldArg = relArg.getArgument();
-          Class<? extends Annotation> argClass = goldArg.getClass();
-
-          // find all annotations covered by the gold argument and of the same
-          // class (these should
-          // be the ones copied over from the cTAKES output earlier)
-          List<? extends Annotation> systemArgs = JCasUtil.selectCovered(
-              goldView, argClass, goldArg);
-
-          // no ctakes annotation found
-          if (systemArgs.size() == 0)
-          {
-            String word = "no";
-            String className = argClass.getSimpleName();
-            String argText = goldArg.getCoveredText();
-            String message = String.format("%s %s for \"%s\"", word, className,
-                argText);
-            this.getContext().getLogger().log(Level.FINE, message);
-            continue;
-          }
-
-          // if there's exactly one annotation, replace the gold one with that
-          if (systemArgs.size() == 1)
-          {
-            relArg.setArgument(systemArgs.get(0));
-            replacedArgumentCount += 1;
-          }
-
-          else
-          {
-            // multiple ctakes arguments found; look for one that matches
-            // exactly
-            // e.g. gold: "right breast", ctakes: "right breast", "breast"
-            for (Annotation systemArg : systemArgs)
-            {
-              String goldArgText = goldArg.getCoveredText();
-              String systemArgText = systemArg.getCoveredText();
-              if (systemArgText.equals(goldArgText))
-              {
-                relArg.setArgument(systemArg);
-                replacedArgumentCount += 1;
-              }
-            }
-
-            if (replacedArgumentCount < 1)
-            {
-              // issue a warning message
-              String word = "multiple";
-              String className = argClass.getSimpleName();
-              String argText = goldArg.getCoveredText();
-              String message = String.format("%s %s for \"%s\"", word,
-                  className, argText);
-              this.getContext().getLogger().log(Level.FINE, message);
-
-              System.out.println("gold argument: " + goldArg.getCoveredText());
-              System.out.println("gold type: "
-                  + ((IdentifiedAnnotation) goldArg).getTypeID());
-              for (Annotation systemArg : systemArgs)
-              {
-                System.out.println("ctakes argument: "
-                    + systemArg.getCoveredText());
-                System.out.println("ctakes type: "
-                    + ((IdentifiedAnnotation) systemArg).getTypeID());
-              }
-              System.out.println();
-            }
-          }
-        }
-
-        // if replacements were not found for both arguments, remove the
-        // relation
-        if (replacedArgumentCount < 2)
-        {
-          relation.removeFromIndexes();
-        }
-      }
-    }
-  }
+//  public static class ReplaceGoldEntityMentionsAndModifiersWithCTakes extends
+//      JCasAnnotator_ImplBase
+//  {
+//
+//    @Override
+//    public void process(JCas jCas) throws AnalysisEngineProcessException
+//    {
+//      JCas goldView, systemView;
+//      try
+//      {
+//        goldView = jCas.getView(GOLD_VIEW_NAME);
+//        systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
+//      } catch (CASException e)
+//      {
+//        throw new AnalysisEngineProcessException(e);
+//      }
+//
+//      // remove manual EntityMentions and Modifiers from gold view
+//      List<IdentifiedAnnotation> goldMentions = new ArrayList<IdentifiedAnnotation>();
+//      goldMentions.addAll(JCasUtil.select(goldView, EntityMention.class));
+//      goldMentions.addAll(JCasUtil.select(goldView, Modifier.class));
+//      for (IdentifiedAnnotation goldMention : goldMentions)
+//      {
+//        goldMention.removeFromIndexes();
+//      }
+//
+//      // copy cTAKES EntityMentions and Modifiers to gold view
+//      List<IdentifiedAnnotation> cTakesMentions = new ArrayList<IdentifiedAnnotation>();
+//      cTakesMentions.addAll(JCasUtil.select(systemView, EntityMention.class));
+//      cTakesMentions.addAll(JCasUtil.select(systemView, Modifier.class));
+//      CasCopier copier = new CasCopier(systemView.getCas(), goldView.getCas());
+//      for (IdentifiedAnnotation cTakesMention : cTakesMentions)
+//      {
+//        Annotation copy = (Annotation) copier.copyFs(cTakesMention);
+//        Feature sofaFeature = copy.getType().getFeatureByBaseName("sofa");
+//        copy.setFeatureValue(sofaFeature, goldView.getSofa());
+//        copy.addToIndexes();
+//      }
+//
+//      // replace gold EntityMentions and Modifiers in relations with cTAKES ones
+//      List<BinaryTextRelation> relations = new ArrayList<BinaryTextRelation>();
+//      relations.addAll(JCasUtil.select(goldView, BinaryTextRelation.class));
+//      for (BinaryTextRelation relation : relations)
+//      {
+//
+//        // attempt to replace the gold RelationArguments with system ones
+//        int replacedArgumentCount = 0;
+//        for (RelationArgument relArg : Arrays.asList(relation.getArg1(),
+//            relation.getArg2()))
+//        {
+//          Annotation goldArg = relArg.getArgument();
+//          Class<? extends Annotation> argClass = goldArg.getClass();
+//
+//          // find all annotations covered by the gold argument and of the same
+//          // class (these should
+//          // be the ones copied over from the cTAKES output earlier)
+//          List<? extends Annotation> systemArgs = JCasUtil.selectCovered(
+//              goldView, argClass, goldArg);
+//
+//          // no ctakes annotation found
+//          if (systemArgs.size() == 0)
+//          {
+//            String word = "no";
+//            String className = argClass.getSimpleName();
+//            String argText = goldArg.getCoveredText();
+//            String message = String.format("%s %s for \"%s\"", word, className,
+//                argText);
+//            this.getContext().getLogger().log(Level.FINE, message);
+//            continue;
+//          }
+//
+//          // if there's exactly one annotation, replace the gold one with that
+//          if (systemArgs.size() == 1)
+//          {
+//            relArg.setArgument(systemArgs.get(0));
+//            replacedArgumentCount += 1;
+//          }
+//
+//          else
+//          {
+//            // multiple ctakes arguments found; look for one that matches
+//            // exactly
+//            // e.g. gold: "right breast", ctakes: "right breast", "breast"
+//            for (Annotation systemArg : systemArgs)
+//            {
+//              String goldArgText = goldArg.getCoveredText();
+//              String systemArgText = systemArg.getCoveredText();
+//              if (systemArgText.equals(goldArgText))
+//              {
+//                relArg.setArgument(systemArg);
+//                replacedArgumentCount += 1;
+//              }
+//            }
+//
+//            if (replacedArgumentCount < 1)
+//            {
+//              // issue a warning message
+//              String word = "multiple";
+//              String className = argClass.getSimpleName();
+//              String argText = goldArg.getCoveredText();
+//              String message = String.format("%s %s for \"%s\"", word,
+//                  className, argText);
+//              this.getContext().getLogger().log(Level.FINE, message);
+//
+//              System.out.println("gold argument: " + goldArg.getCoveredText());
+//              System.out.println("gold type: "
+//                  + ((IdentifiedAnnotation) goldArg).getTypeID());
+//              for (Annotation systemArg : systemArgs)
+//              {
+//                System.out.println("ctakes argument: "
+//                    + systemArg.getCoveredText());
+//                System.out.println("ctakes type: "
+//                    + ((IdentifiedAnnotation) systemArg).getTypeID());
+//              }
+//              System.out.println();
+//            }
+//          }
+//        }
+//
+//        // if replacements were not found for both arguments, remove the
+//        // relation
+//        if (replacedArgumentCount < 2)
+//        {
+//          relation.removeFromIndexes();
+//        }
+//      }
+//    }
+//  }
   
   /**
    * Class that copies the manual {@link Modifier} annotations to the default CAS.

Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java?rev=1411758&r1=1411757&r2=1411758&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java (original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java Tue Nov 20 17:44:30 2012
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -66,7 +66,7 @@ import org.apache.ctakes.typesystem.type
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
 import org.apache.ctakes.typesystem.type.textspan.Sentence;
 
-public class AssertionCleartkAnalysisEngine extends
+public abstract class AssertionCleartkAnalysisEngine extends
     CleartkAnnotator<String>
 {
   Logger logger = Logger.getLogger(AssertionCleartkAnalysisEngine.class);
@@ -170,6 +170,7 @@ public class AssertionCleartkAnalysisEng
 
   }
 
+  public abstract void setClassLabel(IdentifiedAnnotation entityMention, Instance<String> instance) throws AnalysisEngineProcessException;
 
 
   @Override
@@ -271,32 +272,7 @@ public class AssertionCleartkAnalysisEng
         instance.addAll(extractor.extract(identifiedAnnotationView, entityMention));
       }
       
-      if (this.isTraining())
-      {
-        String polarity = (entityMention.getPolarity() == -1) ? "negated" : "present";
-        instance.setOutcome(polarity);
-        if ("negated".equals(polarity))
-        {
-          logger.info("TRAINING: " + polarity);
-        }
-        this.dataWriter.write(instance);
-      } else
-      {
-        String label = this.classifier.classify(instance.getFeatures());
-        int polarity = 1;
-        if (label!= null && label.equals("present"))
-        {
-          polarity = 0;
-        } else if (label != null && label.equals("negated"))
-        {
-          polarity = -1;
-        }
-        entityMention.setPolarity(polarity);
-        if ("negated".equals(label))
-        {
-          logger.info(String.format("DECODING/EVAL: %s//%s [%d-%d] (%s)", label, polarity, entityMention.getBegin(), entityMention.getEnd(), entityMention.getClass().getName()));
-        }
-      }
+      setClassLabel(entityMention, instance);
       
     }
     

Added: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java?rev=1411758&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java (added)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java Tue Nov 20 17:44:30 2012
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.assertion.medfacts.cleartk;
+
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.cleartk.classifier.Instance;
+
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+
+public class ConditionalCleartkAnalysisEngine extends
+		AssertionCleartkAnalysisEngine {
+
+	@Override
+	public void setClassLabel(IdentifiedAnnotation entityMention,
+			Instance<String> instance) throws AnalysisEngineProcessException {
+		if (this.isTraining())
+	      {
+	        String conditional = (entityMention.getConditional()) ? "conditional" : "nonconditional";
+	        instance.setOutcome(conditional);
+	        this.dataWriter.write(instance);
+	      } else
+	      {
+	        String label = this.classifier.classify(instance.getFeatures());
+	        boolean conditional = false;
+	        if (label!= null && label.equals("conditional"))
+	        {
+	          conditional = true;
+	        } else if (label != null && label.equals("nonconditional"))
+	        {
+	          conditional = false;
+	        }
+	        entityMention.setConditional(conditional);
+	      }
+	}
+}

Added: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java?rev=1411758&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java (added)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java Tue Nov 20 17:44:30 2012
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.assertion.medfacts.cleartk;
+
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.cleartk.classifier.Instance;
+
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+
+public class GenericCleartkAnalysisEngine extends
+		AssertionCleartkAnalysisEngine {
+
+	@Override
+	public void setClassLabel(IdentifiedAnnotation entityMention,
+			Instance<String> instance) throws AnalysisEngineProcessException {
+		if (this.isTraining())
+	      {
+	        String subj = entityMention.getSubject();
+	        instance.setOutcome(subj);
+	        this.dataWriter.write(instance);
+	      } else
+	      {
+	        String label = this.classifier.classify(instance.getFeatures());
+	        entityMention.setSubject(label);
+	      }
+	}
+
+}