You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2013/05/15 15:14:22 UTC
svn commit: r1482808 - in
/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference:
ae/ ae/features/ eval/
Author: tmill
Date: Wed May 15 13:14:21 2013
New Revision: 1482808
URL: http://svn.apache.org/r1482808
Log:
Added descriptor methods to coref annotator. Added some eval boilerplate to eval code, not correct but it compiles so checking in to sync up.
Modified:
ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/NamedEntityCoreferenceResolver.java
ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/StringMatchingFeatureExtractor.java
ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java
ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfCoreferencePairs.java
Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/NamedEntityCoreferenceResolver.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/NamedEntityCoreferenceResolver.java?rev=1482808&r1=1482807&r2=1482808&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/NamedEntityCoreferenceResolver.java (original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/NamedEntityCoreferenceResolver.java Wed May 15 13:14:21 2013
@@ -1,5 +1,6 @@
package org.apache.ctakes.coreference.ae;
+import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
@@ -12,15 +13,46 @@ import org.apache.ctakes.coreference.uti
import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.jcas.tcas.DocumentAnnotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.classifier.CleartkAnnotator;
+import org.cleartk.classifier.DataWriter;
+import org.cleartk.classifier.jar.DefaultDataWriterFactory;
+import org.cleartk.classifier.jar.GenericJarClassifierFactory;
+import org.uimafit.factory.AnalysisEngineFactory;
import org.uimafit.util.JCasUtil;
public class NamedEntityCoreferenceResolver extends RelationExtractorAnnotator {
+ public static AnalysisEngineDescription createDataWriterDescription(
+ Class<? extends DataWriter<String>> dataWriterClass,
+ File outputDirectory) throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ NamedEntityCoreferenceResolver.class,
+ CleartkAnnotator.PARAM_IS_TRAINING,
+ true,
+ DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
+ dataWriterClass,
+ DefaultDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+ outputDirectory);
+ }
+
+ public static AnalysisEngineDescription createAnnotatorDescription(File modelDirectory)
+ throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ NamedEntityCoreferenceResolver.class,
+ CleartkAnnotator.PARAM_IS_TRAINING,
+ false,
+ GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+ new File(modelDirectory, "model.jar"));
+ }
+
@Override
protected List<RelationFeaturesExtractor> getFeatureExtractors() {
List<RelationFeaturesExtractor> extractors = new ArrayList<RelationFeaturesExtractor>();
@@ -66,11 +98,15 @@ public class NamedEntityCoreferenceResol
}
private List<IdentifiedAnnotation> getDocumentMarkables(JCas jcas, Annotation coveringAnnotation) {
- Collection<EntityMention> mentions = (JCasUtil.select(jcas, EntityMention.class));
+ List<IdentifiedAnnotation> mentions = new ArrayList<IdentifiedAnnotation>();
+ Collection<EntityMention> entityMentions = (JCasUtil.select(jcas, EntityMention.class));
+ Collection<EventMention> eventMentions = JCasUtil.select(jcas, EventMention.class);
+ mentions.addAll(entityMentions);
+ mentions.addAll(eventMentions);
// expandToNP(mentions);
// mergeNP(mentions);
// elevateAdjectives(mentions);
- return new ArrayList<IdentifiedAnnotation>(mentions);
+ return mentions;
}
@Override
Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/StringMatchingFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/StringMatchingFeatureExtractor.java?rev=1482808&r1=1482807&r2=1482808&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/StringMatchingFeatureExtractor.java (original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/StringMatchingFeatureExtractor.java Wed May 15 13:14:21 2013
@@ -1,7 +1,6 @@
package org.apache.ctakes.coreference.ae.features;
import java.util.ArrayList;
-import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java?rev=1482808&r1=1482807&r2=1482808&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java (original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java Wed May 15 13:14:21 2013
@@ -1,7 +1,6 @@
package org.apache.ctakes.coreference.ae.features;
import java.util.ArrayList;
-import java.util.Collection;
import java.util.List;
import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfCoreferencePairs.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfCoreferencePairs.java?rev=1482808&r1=1482807&r2=1482808&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfCoreferencePairs.java (original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfCoreferencePairs.java Wed May 15 13:14:21 2013
@@ -1,23 +1,75 @@
package org.apache.ctakes.coreference.eval;
import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
import java.util.List;
+import org.apache.ctakes.chunker.ae.Chunker;
+import org.apache.ctakes.chunker.ae.DefaultChunkCreator;
+import org.apache.ctakes.chunker.ae.adjuster.ChunkAdjuster;
+import org.apache.ctakes.constituency.parser.ae.ConstituencyParser;
+import org.apache.ctakes.contexttokenizer.ae.ContextDependentTokenizerAnnotator;
+import org.apache.ctakes.core.ae.OverlapAnnotator;
+import org.apache.ctakes.core.ae.SentenceDetector;
+import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
+import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.core.resource.FileResourceImpl;
+import org.apache.ctakes.core.resource.JdbcConnectionResourceImpl;
+import org.apache.ctakes.core.resource.LuceneIndexReaderResourceImpl;
+import org.apache.ctakes.core.resource.SuffixMaxentModelResourceImpl;
+import org.apache.ctakes.coreference.ae.NamedEntityCoreferenceResolver;
+import org.apache.ctakes.dictionary.lookup.ae.UmlsDictionaryLookupAnnotator;
+import org.apache.ctakes.lvg.ae.LvgAnnotator;
+import org.apache.ctakes.lvg.resource.LvgCmdApiResourceImpl;
+import org.apache.ctakes.postagger.POSTagger;
import org.apache.ctakes.relationextractor.eval.XMIReader;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.syntax.Chunk;
+import org.apache.ctakes.typesystem.type.textspan.LookupWindowAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.UIMAException;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.impl.XmiCasSerializer;
import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.XMLSerializer;
+import org.cleartk.classifier.jar.JarClassifierBuilder;
import org.cleartk.eval.AnnotationStatistics;
import org.cleartk.eval.Evaluation_ImplBase;
+import org.cleartk.util.ViewURIUtil;
+import org.cleartk.util.ae.UriToDocumentTextAnnotator;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.component.ViewCreatorAnnotator;
+import org.uimafit.component.ViewTextCopierAnnotator;
+import org.uimafit.descriptor.ConfigurationParameter;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.factory.AnalysisEngineFactory;
import org.uimafit.factory.CollectionReaderFactory;
+import org.uimafit.factory.ExternalResourceFactory;
+import org.uimafit.factory.TypePrioritiesFactory;
import org.uimafit.factory.TypeSystemDescriptionFactory;
+import org.uimafit.pipeline.SimplePipeline;
+import org.uimafit.testing.util.HideOutput;
+import org.uimafit.util.JCasUtil;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
public class EvaluationOfCoreferencePairs extends
Evaluation_ImplBase<File, AnnotationStatistics<String>> {
public static final String GOLD_VIEW_NAME = "GOLD_VIEW";
-
+ private boolean xmiExists = false;
+ private File xmiDirectory = null;
+
public EvaluationOfCoreferencePairs(File baseDirectory) {
super(baseDirectory);
- // TODO Auto-generated constructor stub
}
@Override
@@ -34,14 +86,25 @@ public class EvaluationOfCoreferencePair
XMIReader.class,
TypeSystemDescriptionFactory.createTypeSystemDescription(),
XMIReader.PARAM_FILES,
- paths);
+ items);
}
@Override
protected void train(CollectionReader collectionReader, File directory)
throws Exception {
- // TODO Auto-generated method stub
-
+ AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
+
+ aggregateBuilder.add(NamedEntityCoreferenceResolver.createAnnotatorDescription(directory));
+ SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate());
+
+ HideOutput hider = new HideOutput();
+ // libsvm:
+ JarClassifierBuilder.trainAndPackage(directory, "-t", "0", "-c", "10");
+ // tksvmlight with no tk features:
+// JarClassifierBuilder.trainAndPackage(directory, "-t", "0", "-c", "10", "-N", "0");
+// JarClassifierBuilder.trainAndPackage(directory, "-t", "5", "-S", "0", "-N", "3", "-C", "+", "-T", "1.0");
+ hider.restoreOutput();
+ hider.close();
}
@Override
@@ -51,6 +114,307 @@ public class EvaluationOfCoreferencePair
return null;
}
+ protected AggregateBuilder getPreprocessorAggregateBuilder() throws Exception {
+ return this.xmiExists
+ ? this.getXMIReadingPreprocessorAggregateBuilder()
+ : this.getXMIWritingPreprocessorAggregateBuilder();
+ }
+
+ protected AggregateBuilder getXMIReadingPreprocessorAggregateBuilder() throws UIMAException {
+ AggregateBuilder aggregateBuilder = new AggregateBuilder();
+ aggregateBuilder.add(UriToDocumentTextAnnotator.getDescription());
+// aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+// XMIReader.class,
+// XMIReader.PARAM_XMI_DIRECTORY,
+// this.xmiDirectory));
+ return aggregateBuilder;
+ }
+
+ protected AggregateBuilder getXMIWritingPreprocessorAggregateBuilder()
+ throws Exception {
+ AggregateBuilder aggregateBuilder = new AggregateBuilder();
+ aggregateBuilder.add(UriToDocumentTextAnnotator.getDescription());
+
+ // read manual annotations into gold view
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+ ViewCreatorAnnotator.class,
+ ViewCreatorAnnotator.PARAM_VIEW_NAME,
+ GOLD_VIEW_NAME));
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+ ViewTextCopierAnnotator.class,
+ ViewTextCopierAnnotator.PARAM_SOURCE_VIEW_NAME,
+ CAS.NAME_DEFAULT_SOFA,
+ ViewTextCopierAnnotator.PARAM_DESTINATION_VIEW_NAME,
+ GOLD_VIEW_NAME));
+// aggregateBuilder.add(
+// THYMEKnowtatorXMLReader.getDescription(this.knowtatorXMLDirectory),
+// CAS.NAME_DEFAULT_SOFA,
+// GOLD_VIEW_NAME);
+
+ // identify segments
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SimpleSegmentAnnotator.class));
+ // identify sentences
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+ SentenceDetector.class,
+ "MaxentModel",
+ ExternalResourceFactory.createExternalResourceDescription(
+ SuffixMaxentModelResourceImpl.class,
+ FileLocator.locateFile("org/apache/ctakes/core/sentdetect/sdmed.mod").toURI().toURL())));
+ // identify tokens
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TokenizerAnnotatorPTB.class));
+ // merge some tokens
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ContextDependentTokenizerAnnotator.class));
+
+ // identify part-of-speech tags
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+ POSTagger.class,
+ TypeSystemDescriptionFactory.createTypeSystemDescription(),
+ TypePrioritiesFactory.createTypePriorities(Segment.class, Sentence.class, BaseToken.class),
+ POSTagger.POS_MODEL_FILE_PARAM,
+ "org/apache/ctakes/postagger/models/mayo-pos.zip",
+ POSTagger.TAG_DICTIONARY_PARAM,
+ "org/apache/ctakes/postagger/models/tag.dictionary.txt",
+ POSTagger.CASE_SENSITIVE_PARAM,
+ true));
+
+ // identify chunks
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+ Chunker.class,
+ Chunker.CHUNKER_MODEL_FILE_PARAM,
+ FileLocator.locateFile("org/apache/ctakes/chunker/models/chunk-model.claims-1.5.zip"),
+ Chunker.CHUNKER_CREATOR_CLASS_PARAM,
+ DefaultChunkCreator.class));
+
+ // identify UMLS named entities
+
+ // adjust NP in NP NP to span both
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+ ChunkAdjuster.class,
+ ChunkAdjuster.PARAM_CHUNK_PATTERN,
+ new String[] { "NP", "NP" },
+ ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
+ 1));
+ // adjust NP in NP PP NP to span all three
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+ ChunkAdjuster.class,
+ ChunkAdjuster.PARAM_CHUNK_PATTERN,
+ new String[] { "NP", "PP", "NP" },
+ ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
+ 2));
+ // add lookup windows for each NP
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(CopyNPChunksToLookupWindowAnnotations.class));
+ // maximize lookup windows
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+ OverlapAnnotator.class,
+ "A_ObjectClass",
+ LookupWindowAnnotation.class,
+ "B_ObjectClass",
+ LookupWindowAnnotation.class,
+ "OverlapType",
+ "A_ENV_B",
+ "ActionType",
+ "DELETE",
+ "DeleteAction",
+ new String[] { "selector=B" }));
+ // add UMLS on top of lookup windows
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+ UmlsDictionaryLookupAnnotator.class,
+ "ctakes.umlsaddr",
+ "https://uts-ws.nlm.nih.gov/restful/isValidUMLSUser",
+ "ctakes.umlsvendor",
+ "NLM-6515182895",
+ "LookupDescriptor",
+ ExternalResourceFactory.createExternalResourceDescription(
+ FileResourceImpl.class,
+ new File("target/unpacked/org/apache/ctakes/dictionary/lookup/LookupDesc_Db.xml").getAbsoluteFile()),
+ "DbConnection",
+ ExternalResourceFactory.createExternalResourceDescription(
+ JdbcConnectionResourceImpl.class,
+ "",
+ JdbcConnectionResourceImpl.PARAM_DRIVER_CLASS,
+ "org.hsqldb.jdbcDriver",
+ JdbcConnectionResourceImpl.PARAM_URL,
+ // Should be the following but it's WAY too slow
+ // "jdbc:hsqldb:res:/org/apache/ctakes/dictionary/lookup/umls2011ab/umls"),
+ "jdbc:hsqldb:file:target/unpacked/org/apache/ctakes/dictionary/lookup/umls2011ab/umls"),
+ "RxnormIndexReader",
+ ExternalResourceFactory.createExternalResourceDescription(
+ LuceneIndexReaderResourceImpl.class,
+ "",
+ "UseMemoryIndex",
+ true,
+ "IndexDirectory",
+ new File("target/unpacked/org/apache/ctakes/dictionary/lookup/rxnorm_index").getAbsoluteFile()),
+ "OrangeBookIndexReader",
+ ExternalResourceFactory.createExternalResourceDescription(
+ LuceneIndexReaderResourceImpl.class,
+ "",
+ "UseMemoryIndex",
+ true,
+ "IndexDirectory",
+ new File("target/unpacked/org/apache/ctakes/dictionary/lookup/OrangeBook").getAbsoluteFile())));
+
+ // add lvg annotator
+ String[] XeroxTreebankMap = {
+ "adj|JJ",
+ "adv|RB",
+ "aux|AUX",
+ "compl|CS",
+ "conj|CC",
+ "det|DET",
+ "modal|MD",
+ "noun|NN",
+ "prep|IN",
+ "pron|PRP",
+ "verb|VB" };
+ String[] ExclusionSet = {
+ "and",
+ "And",
+ "by",
+ "By",
+ "for",
+ "For",
+ "in",
+ "In",
+ "of",
+ "Of",
+ "on",
+ "On",
+ "the",
+ "The",
+ "to",
+ "To",
+ "with",
+ "With" };
+ AnalysisEngineDescription lvgAnnotator = AnalysisEngineFactory.createPrimitiveDescription(
+ LvgAnnotator.class,
+ "UseSegments",
+ false,
+ "SegmentsToSkip",
+ new String[0],
+ "UseCmdCache",
+ false,
+ "CmdCacheFileLocation",
+ "/org/apache/ctakes/lvg/2005_norm.voc",
+ "CmdCacheFrequencyCutoff",
+ 20,
+ "ExclusionSet",
+ ExclusionSet,
+ "XeroxTreebankMap",
+ XeroxTreebankMap,
+ "LemmaCacheFileLocation",
+ "/org/apache/ctakes/lvg/2005_lemma.voc",
+ "UseLemmaCache",
+ false,
+ "LemmaCacheFrequencyCutoff",
+ 20,
+ "PostLemmas",
+ true,
+ "LvgCmdApi",
+ ExternalResourceFactory.createExternalResourceDescription(
+ LvgCmdApiResourceImpl.class,
+ new File(LvgCmdApiResourceImpl.class.getResource(
+ "/org/apache/ctakes/lvg/data/config/lvg.properties").toURI())));
+ aggregateBuilder.add(lvgAnnotator);
+
+ // add dependency parser
+// aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearNLPDependencyParserAE.class));
+
+ // add semantic role labeler
+// aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearNLPSemanticRoleLabelerAE.class));
+
+ // add constituency parser
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ConstituencyParser.class));
+
+ // write out the CAS after all the above annotations
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+ XMIWriter.class,
+ XMIWriter.PARAM_XMI_DIRECTORY,
+ this.xmiDirectory));
+
+ return aggregateBuilder;
+ }
+
+ public static class XMIWriter extends JCasAnnotator_ImplBase {
+
+ public static final String PARAM_XMI_DIRECTORY = "XMIDirectory";
+
+ @ConfigurationParameter(name = PARAM_XMI_DIRECTORY, mandatory = true)
+ private File xmiDirectory;
+
+ @Override
+ public void initialize(UimaContext context) throws ResourceInitializationException {
+ super.initialize(context);
+ if (!this.xmiDirectory.exists()) {
+ this.xmiDirectory.mkdirs();
+ }
+ }
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ File xmiFile = getXMIFile(this.xmiDirectory, jCas);
+ try {
+ FileOutputStream outputStream = new FileOutputStream(xmiFile);
+ try {
+ XmiCasSerializer serializer = new XmiCasSerializer(jCas.getTypeSystem());
+ ContentHandler handler = new XMLSerializer(outputStream, false).getContentHandler();
+ serializer.serialize(jCas.getCas(), handler);
+ } finally {
+ outputStream.close();
+ }
+ } catch (SAXException e) {
+ throw new AnalysisEngineProcessException(e);
+ } catch (IOException e) {
+ throw new AnalysisEngineProcessException(e);
+ }
+ }
+ }
+/*
+ public static class XMIReader extends JCasAnnotator_ImplBase {
+
+ public static final String PARAM_XMI_DIRECTORY = "XMIDirectory";
+
+ @ConfigurationParameter(name = PARAM_XMI_DIRECTORY, mandatory = true)
+ private File xmiDirectory;
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ File xmiFile = getXMIFile(this.xmiDirectory, jCas);
+ try {
+ FileInputStream inputStream = new FileInputStream(xmiFile);
+ try {
+ XmiCasDeserializer.deserialize(inputStream, jCas.getCas());
+ } finally {
+ inputStream.close();
+ }
+ } catch (SAXException e) {
+ throw new AnalysisEngineProcessException(e);
+ } catch (IOException e) {
+ throw new AnalysisEngineProcessException(e);
+ }
+ }
+ }
+ */
+ static File getXMIFile(File xmiDirectory, File textFile) {
+ return new File(xmiDirectory, textFile.getName() + ".xmi");
+ }
+
+ static File getXMIFile(File xmiDirectory, JCas jCas) throws AnalysisEngineProcessException {
+ return getXMIFile(xmiDirectory, new File(ViewURIUtil.getURI(jCas).getPath()));
+ }
+
+ public static class CopyNPChunksToLookupWindowAnnotations extends JCasAnnotator_ImplBase {
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ for (Chunk chunk : JCasUtil.select(jCas, Chunk.class)) {
+ if (chunk.getChunkType().equals("NP")) {
+ new LookupWindowAnnotation(jCas, chunk.getBegin(), chunk.getEnd()).addToIndexes();
+ }
+ }
+ }
+ }
+
/**
* @param args
*/