You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ja...@apache.org on 2013/05/20 17:34:30 UTC

svn commit: r1484500 - /ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/MergeGoldViewFromOneCasIntoInitialViewOfAnotherCas.java

Author: james-masanz
Date: Mon May 20 15:34:30 2013
New Revision: 1484500

URL: http://svn.apache.org/r1484500
Log:
annotator for reading XMI containing gold standard annotations and copying them into a new view of the current CAS

Added:
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/MergeGoldViewFromOneCasIntoInitialViewOfAnotherCas.java   (with props)

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/MergeGoldViewFromOneCasIntoInitialViewOfAnotherCas.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/MergeGoldViewFromOneCasIntoInitialViewOfAnotherCas.java?rev=1484500&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/MergeGoldViewFromOneCasIntoInitialViewOfAnotherCas.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/MergeGoldViewFromOneCasIntoInitialViewOfAnotherCas.java Mon May 20 15:34:30 2013
@@ -0,0 +1,271 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.assertion.eval;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.log4j.Logger;
+import org.apache.uima.UIMAException;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.metadata.ResourceMetaData;
+import org.apache.uima.resource.metadata.TypeSystemDescription;
+import org.apache.uima.util.CasCreationUtils;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.component.NoOpAnnotator;
+import org.uimafit.component.ViewCreatorAnnotator;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.factory.CollectionReaderFactory;
+import org.uimafit.factory.TypeSystemDescriptionFactory;
+import org.uimafit.util.JCasUtil;
+
+/**
+ * 
+ * Read in gold annotations from XMI and create a view within the current CAS, and copy the 
+ * gold annotations into the new view within the current CAS.
+ * Written to handle testing the cTAKES 2.5 assertion (polarity) value against the 
+ * gold standard, using XMI that had already been created by the Apache cTAKES 3.0 gold standard reader. 
+ *
+ */
+public class MergeGoldViewFromOneCasIntoInitialViewOfAnotherCas extends JCasAnnotator_ImplBase {
+
+	static final Logger LOGGER = Logger.getLogger(MergeGoldViewFromOneCasIntoInitialViewOfAnotherCas.class.getName());
+
+
+	private static final String dirWithGoldViews = "/SHARP-data/assertion/UMLS_CEM-gold-combined"; // TODO parameterize this
+	private static final File goldViewDir = new File(dirWithGoldViews);
+	
+	@Override
+	public void initialize(UimaContext context) throws ResourceInitializationException {
+		super.initialize(context);
+	}
+
+	/**
+	 * Copy all annotations of the given types from the corresponding cas that has a gold view to the current cas
+	 */
+	@Override
+	public void process(JCas jCas) throws AnalysisEngineProcessException {
+		String docId = DocumentIDAnnotationUtil.getDocumentID(jCas);
+		JCas correspondingCasThatHasGoldAnnotations = getCorrespondingCasThatHasGoldAnnotations(docId);
+		JCas viewWithPreexistingGoldAnnotations = null;
+		JCas newGoldView = null;
+
+		viewWithPreexistingGoldAnnotations = getViewWithGoldAnnotations(correspondingCasThatHasGoldAnnotations);
+
+		// Create the new view in the current CAS so the current CAS has both views
+		// org.uimafit.util.ViewNames.INITIAL_VIEW; // org.uimafit.examples.experiment.pos.ViewNames
+
+
+		//newGoldView = jCas.createView(AssertionEvaluation.GOLD_VIEW_NAME);
+		newGoldView = ViewCreatorAnnotator.createViewSafely(jCas, AssertionEvaluation.GOLD_VIEW_NAME);
+		newGoldView.setSofaDataString(jCas.getSofaDataString(), jCas.getSofaMimeType());
+			
+		
+		int countCopied = 0;
+		int countSkipped = 0;
+		if (viewWithPreexistingGoldAnnotations==null) throw new RuntimeException("viewWithPreexistingGoldAnnotations is null");
+		Collection<? extends Annotation> annotations = JCasUtil.select(viewWithPreexistingGoldAnnotations, Annotation.class);
+		LOGGER.debug("Found " + annotations.size() + " annotations.");
+		//		Collection<? extends Annotation> evm = JCasUtil.select(viewWithPreexistingGoldAnnotations, EventMention.class);
+		//		LOGGER.debug("Found " + evm.size() + " evm.");
+		//		Collection<? extends Annotation> entm = JCasUtil.select(viewWithPreexistingGoldAnnotations, EntityMention.class);
+		//		LOGGER.debug("Found " + entm.size() + " entm.");
+		//		Collection<? extends Annotation> s = JCasUtil.select(viewWithPreexistingGoldAnnotations, Sentence.class);
+		//		LOGGER.debug("Found " + s.size() + " s.");
+		
+		for (Annotation a: annotations) {
+			if (isInstanceOfOneOfClassesToCopy(a)) {
+				copyAnnotation(a, newGoldView);
+				countCopied++;
+			} else {
+				countSkipped++;
+			}
+		}
+		
+		
+		LOGGER.debug("Copied " + countCopied + " gold annotations out of " + (countSkipped+countCopied) + " to cas, which now has " + newGoldView.getAnnotationIndex().size() + " indexed annotations in " +newGoldView.getViewName());
+		LOGGER.debug(" and has " + jCas.getAnnotationIndex().size() + " indexed annotations in " +jCas.getViewName());
+
+	}
+
+	private static JCas getViewWithGoldAnnotations(JCas correspondingCasThatHasGoldAnnotations) {
+		JCas viewWithPreexistingGoldAnnotations = null;
+		try {
+			viewWithPreexistingGoldAnnotations = correspondingCasThatHasGoldAnnotations.getView(AssertionEvaluation.GOLD_VIEW_NAME);
+		} catch (org.apache.uima.cas.CASRuntimeException cre) {
+			// Let it just continue if there's an exception and check for null later
+		} catch (org.apache.uima.cas.CASException viewException) {
+			// Let it just continue if there's an exception and check for null later
+		} catch (NullPointerException npe) {
+			// Let it just continue if there's an exception and check for null later
+		}
+		if (viewWithPreexistingGoldAnnotations == null) {
+			viewWithPreexistingGoldAnnotations = correspondingCasThatHasGoldAnnotations;
+			LOGGER.debug("Using view " + viewWithPreexistingGoldAnnotations.getViewName());
+			int n  = viewWithPreexistingGoldAnnotations.getAnnotationIndex().size();
+			LOGGER.debug("With " + n + " annotations");
+			if (n==0) {
+				Iterator<CAS> iter = viewWithPreexistingGoldAnnotations.getCas().getViewIterator();
+				while (iter.hasNext()) {
+					CAS cas = iter.next();
+					LOGGER.debug("view " + cas.getViewName() + " has " + cas.getAnnotationIndex().size() + " indexed annotations.");
+					
+				}
+				throw new RuntimeException("n==0");
+			}
+		}
+		return viewWithPreexistingGoldAnnotations;
+	}
+
+	/**
+	 * 
+	 * @param goldAnnotation
+	 * @param jcas
+	 */
+	private static void copyAnnotation(Annotation goldAnnotation, JCas jcas) {
+		
+		Annotation newAnno;
+		if (goldAnnotation instanceof IdentifiedAnnotation) {
+			IdentifiedAnnotation ia = new IdentifiedAnnotation(jcas);
+			ia.setConditional(((IdentifiedAnnotation) goldAnnotation).getConditional());
+			ia.setConfidence(((IdentifiedAnnotation) goldAnnotation).getConfidence());
+	        ia.setDiscoveryTechnique(((IdentifiedAnnotation)goldAnnotation).getDiscoveryTechnique());
+			ia.setGeneric(((IdentifiedAnnotation) goldAnnotation).getGeneric());
+			ia.setHistoryOf(((IdentifiedAnnotation) goldAnnotation).getHistoryOf());
+			ia.setPolarity(((IdentifiedAnnotation) goldAnnotation).getPolarity());
+			ia.setSegmentID(((IdentifiedAnnotation) goldAnnotation).getSegmentID());
+			ia.setSentenceID(((IdentifiedAnnotation) goldAnnotation).getSentenceID());
+			ia.setSubject(((IdentifiedAnnotation) goldAnnotation).getSubject());
+			ia.setTypeID(((IdentifiedAnnotation) goldAnnotation).getTypeID());
+			ia.setUncertainty(((IdentifiedAnnotation) goldAnnotation).getUncertainty());
+			newAnno = ia;
+		} else {
+			throw new RuntimeException("Unexpected class of object " + goldAnnotation.getClass());
+		}
+
+		newAnno.setBegin(goldAnnotation.getBegin());
+		newAnno.setEnd(goldAnnotation.getEnd());
+		newAnno.addToIndexes();
+		
+	}
+
+	private static boolean isInstanceOfOneOfClassesToCopy(Annotation a) {
+		if (a instanceof EventMention) return true;
+		if (a instanceof EntityMention) return true;
+		return false;
+	}
+
+	
+	private static JCas getCorrespondingCasThatHasGoldAnnotations(String docId) {
+		File f = new File(goldViewDir, docId);
+		if (!f.exists()) f = new File(goldViewDir, docId+".xml");
+		if (!f.exists()) f = new File(goldViewDir, docId+".xcas");
+		if (!f.exists()) f = new File(goldViewDir, docId+".xmi");
+		if (!f.exists()) f = new File(goldViewDir, docId+".xcas.xml");
+		if (!f.exists()) f = new File(goldViewDir, docId+".xmi.xml");
+		
+		if (!f.exists())
+			try {
+				throw new RuntimeException("Unable to find file for doc ID " + docId + " in " + goldViewDir.getName() + " aka " + goldViewDir.getCanonicalPath());
+			} catch (IOException e) {
+				throw new RuntimeException("Unable to find file for doc ID " + docId + " in " + goldViewDir.getName());
+			}
+		return getJcas(f);
+	}
+
+	private static JCas getJcas(File f) {
+		List<File> list = new ArrayList<File>();
+		list.add(f);
+		CollectionReader cr;
+		AggregateBuilder builder;
+		
+		try {
+			cr = getCollectionReader(list);
+			builder = new AggregateBuilder();
+
+			// uimafit find available type systems on classpath
+			TypeSystemDescription typeSystemDescription = TypeSystemDescriptionFactory.createTypeSystemDescription();
+
+			AnalysisEngineDescription noOp = AnalysisEngineFactory.createPrimitiveDescription(NoOpAnnotator.class, typeSystemDescription);
+			builder.add(noOp);
+		} catch (ResourceInitializationException e) {
+			throw new RuntimeException(e);
+		}
+
+		try {
+			//SimplePipeline.runPipeline(cr, builder.createAggregate());
+			AnalysisEngine engine = builder.createAggregate();
+
+			final List<ResourceMetaData> metaData = new ArrayList<ResourceMetaData>();
+			metaData.add(cr.getMetaData());
+			metaData.add(engine.getMetaData());
+			
+			final CAS cas;
+			cas = CasCreationUtils.createCas(metaData);
+			
+			if (cr.hasNext()) { // assumes just one document to process
+				cr.getNext(cas);
+				engine.process(cas); // SimplePipeline.runPipeline(cas, engine);
+			}
+
+			engine.collectionProcessComplete();
+
+			return cas.getJCas();
+
+		} catch (ResourceInitializationException e) {
+			throw new RuntimeException(e);
+		} catch (UIMAException e) {
+			throw new RuntimeException(e);
+		} catch (IOException e) {
+			throw new RuntimeException(e);
+		}
+
+	}
+
+	  public static CollectionReader getCollectionReader(List<File> items) throws ResourceInitializationException {
+		    String[] paths = new String[items.size()];
+		    for (int i = 0; i < paths.length; ++i) {
+		      paths[i] = items.get(i).getPath();
+		    }
+		    return CollectionReaderFactory.createCollectionReader(
+		        XMIReader.class,
+		        TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath(),
+		        XMIReader.PARAM_FILES,
+		        paths);
+		  }
+
+}
+ 
\ No newline at end of file

Propchange: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/MergeGoldViewFromOneCasIntoInitialViewOfAnotherCas.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain