You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ja...@apache.org on 2013/05/20 17:34:30 UTC
svn commit: r1484500 -
/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/MergeGoldViewFromOneCasIntoInitialViewOfAnotherCas.java
Author: james-masanz
Date: Mon May 20 15:34:30 2013
New Revision: 1484500
URL: http://svn.apache.org/r1484500
Log:
annotator for reading XMI containing gold standard annotations and copying them into a new view of the current CAS
Added:
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/MergeGoldViewFromOneCasIntoInitialViewOfAnotherCas.java (with props)
Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/MergeGoldViewFromOneCasIntoInitialViewOfAnotherCas.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/MergeGoldViewFromOneCasIntoInitialViewOfAnotherCas.java?rev=1484500&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/MergeGoldViewFromOneCasIntoInitialViewOfAnotherCas.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/MergeGoldViewFromOneCasIntoInitialViewOfAnotherCas.java Mon May 20 15:34:30 2013
@@ -0,0 +1,271 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.assertion.eval;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.log4j.Logger;
+import org.apache.uima.UIMAException;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.metadata.ResourceMetaData;
+import org.apache.uima.resource.metadata.TypeSystemDescription;
+import org.apache.uima.util.CasCreationUtils;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.component.NoOpAnnotator;
+import org.uimafit.component.ViewCreatorAnnotator;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.factory.CollectionReaderFactory;
+import org.uimafit.factory.TypeSystemDescriptionFactory;
+import org.uimafit.util.JCasUtil;
+
+/**
+ *
+ * Read in gold annotations from XMI and create a view within the current CAS, and copy the
+ * gold annotations into the new view within the current CAS.
+ * Written to handle testing the cTAKES 2.5 assertion (polarity) value against the
+ * gold standard, using XMI that had already been created by the Apache cTAKES 3.0 gold standard reader.
+ *
+ */
+public class MergeGoldViewFromOneCasIntoInitialViewOfAnotherCas extends JCasAnnotator_ImplBase {
+
+ static final Logger LOGGER = Logger.getLogger(MergeGoldViewFromOneCasIntoInitialViewOfAnotherCas.class.getName());
+
+
+ private static final String dirWithGoldViews = "/SHARP-data/assertion/UMLS_CEM-gold-combined"; // TODO parameterize this
+ private static final File goldViewDir = new File(dirWithGoldViews);
+
+ @Override
+ public void initialize(UimaContext context) throws ResourceInitializationException {
+ super.initialize(context);
+ }
+
+ /**
+ * Copy all annotations of the given types from the corresponding cas that has a gold view to the current cas
+ */
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ String docId = DocumentIDAnnotationUtil.getDocumentID(jCas);
+ JCas correspondingCasThatHasGoldAnnotations = getCorrespondingCasThatHasGoldAnnotations(docId);
+ JCas viewWithPreexistingGoldAnnotations = null;
+ JCas newGoldView = null;
+
+ viewWithPreexistingGoldAnnotations = getViewWithGoldAnnotations(correspondingCasThatHasGoldAnnotations);
+
+ // Create the new view in the current CAS so the current CAS has both views
+ // org.uimafit.util.ViewNames.INITIAL_VIEW; // org.uimafit.examples.experiment.pos.ViewNames
+
+
+ //newGoldView = jCas.createView(AssertionEvaluation.GOLD_VIEW_NAME);
+ newGoldView = ViewCreatorAnnotator.createViewSafely(jCas, AssertionEvaluation.GOLD_VIEW_NAME);
+ newGoldView.setSofaDataString(jCas.getSofaDataString(), jCas.getSofaMimeType());
+
+
+ int countCopied = 0;
+ int countSkipped = 0;
+ if (viewWithPreexistingGoldAnnotations==null) throw new RuntimeException("viewWithPreexistingGoldAnnotations is null");
+ Collection<? extends Annotation> annotations = JCasUtil.select(viewWithPreexistingGoldAnnotations, Annotation.class);
+ LOGGER.debug("Found " + annotations.size() + " annotations.");
+ // Collection<? extends Annotation> evm = JCasUtil.select(viewWithPreexistingGoldAnnotations, EventMention.class);
+ // LOGGER.debug("Found " + evm.size() + " evm.");
+ // Collection<? extends Annotation> entm = JCasUtil.select(viewWithPreexistingGoldAnnotations, EntityMention.class);
+ // LOGGER.debug("Found " + entm.size() + " entm.");
+ // Collection<? extends Annotation> s = JCasUtil.select(viewWithPreexistingGoldAnnotations, Sentence.class);
+ // LOGGER.debug("Found " + s.size() + " s.");
+
+ for (Annotation a: annotations) {
+ if (isInstanceOfOneOfClassesToCopy(a)) {
+ copyAnnotation(a, newGoldView);
+ countCopied++;
+ } else {
+ countSkipped++;
+ }
+ }
+
+
+ LOGGER.debug("Copied " + countCopied + " gold annotations out of " + (countSkipped+countCopied) + " to cas, which now has " + newGoldView.getAnnotationIndex().size() + " indexed annotations in " +newGoldView.getViewName());
+ LOGGER.debug(" and has " + jCas.getAnnotationIndex().size() + " indexed annotations in " +jCas.getViewName());
+
+ }
+
+ private static JCas getViewWithGoldAnnotations(JCas correspondingCasThatHasGoldAnnotations) {
+ JCas viewWithPreexistingGoldAnnotations = null;
+ try {
+ viewWithPreexistingGoldAnnotations = correspondingCasThatHasGoldAnnotations.getView(AssertionEvaluation.GOLD_VIEW_NAME);
+ } catch (org.apache.uima.cas.CASRuntimeException cre) {
+ // Let it just continue if there's an exception and check for null later
+ } catch (org.apache.uima.cas.CASException viewException) {
+ // Let it just continue if there's an exception and check for null later
+ } catch (NullPointerException npe) {
+ // Let it just continue if there's an exception and check for null later
+ }
+ if (viewWithPreexistingGoldAnnotations == null) {
+ viewWithPreexistingGoldAnnotations = correspondingCasThatHasGoldAnnotations;
+ LOGGER.debug("Using view " + viewWithPreexistingGoldAnnotations.getViewName());
+ int n = viewWithPreexistingGoldAnnotations.getAnnotationIndex().size();
+ LOGGER.debug("With " + n + " annotations");
+ if (n==0) {
+ Iterator<CAS> iter = viewWithPreexistingGoldAnnotations.getCas().getViewIterator();
+ while (iter.hasNext()) {
+ CAS cas = iter.next();
+ LOGGER.debug("view " + cas.getViewName() + " has " + cas.getAnnotationIndex().size() + " indexed annotations.");
+
+ }
+ throw new RuntimeException("n==0");
+ }
+ }
+ return viewWithPreexistingGoldAnnotations;
+ }
+
+ /**
+ *
+ * @param goldAnnotation
+ * @param jcas
+ */
+ private static void copyAnnotation(Annotation goldAnnotation, JCas jcas) {
+
+ Annotation newAnno;
+ if (goldAnnotation instanceof IdentifiedAnnotation) {
+ IdentifiedAnnotation ia = new IdentifiedAnnotation(jcas);
+ ia.setConditional(((IdentifiedAnnotation) goldAnnotation).getConditional());
+ ia.setConfidence(((IdentifiedAnnotation) goldAnnotation).getConfidence());
+ ia.setDiscoveryTechnique(((IdentifiedAnnotation)goldAnnotation).getDiscoveryTechnique());
+ ia.setGeneric(((IdentifiedAnnotation) goldAnnotation).getGeneric());
+ ia.setHistoryOf(((IdentifiedAnnotation) goldAnnotation).getHistoryOf());
+ ia.setPolarity(((IdentifiedAnnotation) goldAnnotation).getPolarity());
+ ia.setSegmentID(((IdentifiedAnnotation) goldAnnotation).getSegmentID());
+ ia.setSentenceID(((IdentifiedAnnotation) goldAnnotation).getSentenceID());
+ ia.setSubject(((IdentifiedAnnotation) goldAnnotation).getSubject());
+ ia.setTypeID(((IdentifiedAnnotation) goldAnnotation).getTypeID());
+ ia.setUncertainty(((IdentifiedAnnotation) goldAnnotation).getUncertainty());
+ newAnno = ia;
+ } else {
+ throw new RuntimeException("Unexpected class of object " + goldAnnotation.getClass());
+ }
+
+ newAnno.setBegin(goldAnnotation.getBegin());
+ newAnno.setEnd(goldAnnotation.getEnd());
+ newAnno.addToIndexes();
+
+ }
+
+ private static boolean isInstanceOfOneOfClassesToCopy(Annotation a) {
+ if (a instanceof EventMention) return true;
+ if (a instanceof EntityMention) return true;
+ return false;
+ }
+
+
+ private static JCas getCorrespondingCasThatHasGoldAnnotations(String docId) {
+ File f = new File(goldViewDir, docId);
+ if (!f.exists()) f = new File(goldViewDir, docId+".xml");
+ if (!f.exists()) f = new File(goldViewDir, docId+".xcas");
+ if (!f.exists()) f = new File(goldViewDir, docId+".xmi");
+ if (!f.exists()) f = new File(goldViewDir, docId+".xcas.xml");
+ if (!f.exists()) f = new File(goldViewDir, docId+".xmi.xml");
+
+ if (!f.exists())
+ try {
+ throw new RuntimeException("Unable to find file for doc ID " + docId + " in " + goldViewDir.getName() + " aka " + goldViewDir.getCanonicalPath());
+ } catch (IOException e) {
+ throw new RuntimeException("Unable to find file for doc ID " + docId + " in " + goldViewDir.getName());
+ }
+ return getJcas(f);
+ }
+
+ private static JCas getJcas(File f) {
+ List<File> list = new ArrayList<File>();
+ list.add(f);
+ CollectionReader cr;
+ AggregateBuilder builder;
+
+ try {
+ cr = getCollectionReader(list);
+ builder = new AggregateBuilder();
+
+ // uimafit find available type systems on classpath
+ TypeSystemDescription typeSystemDescription = TypeSystemDescriptionFactory.createTypeSystemDescription();
+
+ AnalysisEngineDescription noOp = AnalysisEngineFactory.createPrimitiveDescription(NoOpAnnotator.class, typeSystemDescription);
+ builder.add(noOp);
+ } catch (ResourceInitializationException e) {
+ throw new RuntimeException(e);
+ }
+
+ try {
+ //SimplePipeline.runPipeline(cr, builder.createAggregate());
+ AnalysisEngine engine = builder.createAggregate();
+
+ final List<ResourceMetaData> metaData = new ArrayList<ResourceMetaData>();
+ metaData.add(cr.getMetaData());
+ metaData.add(engine.getMetaData());
+
+ final CAS cas;
+ cas = CasCreationUtils.createCas(metaData);
+
+ if (cr.hasNext()) { // assumes just one document to process
+ cr.getNext(cas);
+ engine.process(cas); // SimplePipeline.runPipeline(cas, engine);
+ }
+
+ engine.collectionProcessComplete();
+
+ return cas.getJCas();
+
+ } catch (ResourceInitializationException e) {
+ throw new RuntimeException(e);
+ } catch (UIMAException e) {
+ throw new RuntimeException(e);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+
+ }
+
+ public static CollectionReader getCollectionReader(List<File> items) throws ResourceInitializationException {
+ String[] paths = new String[items.size()];
+ for (int i = 0; i < paths.length; ++i) {
+ paths[i] = items.get(i).getPath();
+ }
+ return CollectionReaderFactory.createCollectionReader(
+ XMIReader.class,
+ TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath(),
+ XMIReader.PARAM_FILES,
+ paths);
+ }
+
+}
+
\ No newline at end of file
Propchange: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/MergeGoldViewFromOneCasIntoInitialViewOfAnotherCas.java
------------------------------------------------------------------------------
svn:mime-type = text/plain