You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2021/09/21 21:42:39 UTC
svn commit: r1893500 - in /ctakes/trunk/ctakes-relation-extractor:
desc/analysis_engine/
src/main/java/org/apache/ctakes/relationextractor/pipelines/
Author: tmill
Date: Tue Sep 21 21:42:38 2021
New Revision: 1893500
URL: http://svn.apache.org/viewvc?rev=1893500&view=rev
Log:
Update relation extraction pipeline with modern uima library calls. also update to use fast dictionary in aggregates.
Modified:
ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorAggregate.xml
ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorPreprocessor.xml
ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorPipeline.java
Modified: ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorAggregate.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorAggregate.xml?rev=1893500&r1=1893499&r2=1893500&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorAggregate.xml (original)
+++ ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorAggregate.xml Tue Sep 21 21:42:38 2021
@@ -153,7 +153,7 @@
<import location="../../../ctakes-chunker/desc/Chunker.xml"/>
</delegateAnalysisEngine>
<delegateAnalysisEngine key="DictionaryLookupAnnotatorDB">
- <import location="../../../ctakes-dictionary-lookup/desc/analysis_engine/DictionaryLookupAnnotatorUMLS.xml"/>
+ <import location="../../../ctakes-dictionary-lookup-fast/desc/analysis_engine/UmlsLookupAnnotator.xml"/>
</delegateAnalysisEngine>
<delegateAnalysisEngine key="LookupWindowAnnotator">
<import location="../../../ctakes-clinical-pipeline/desc/analysis_engine/LookupWindowAnnotator.xml"/>
Modified: ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorPreprocessor.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorPreprocessor.xml?rev=1893500&r1=1893499&r2=1893500&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorPreprocessor.xml (original)
+++ ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorPreprocessor.xml Tue Sep 21 21:42:38 2021
@@ -60,8 +60,9 @@
<import location="../../../ctakes-dependency-parser/desc/analysis_engine/ClearNLPDependencyParserAE.xml"/>
</delegateAnalysisEngine>
<delegateAnalysisEngine key="DictionaryLookupAnnotatorDB">
- <import location="../../../ctakes-dictionary-lookup/desc/analysis_engine/DictionaryLookupAnnotatorUMLS.xml"/>
+ <!--<import location="../../../ctakes-dictionary-lookup/desc/analysis_engine/DictionaryLookupAnnotatorUMLS.xml"/>-->
<!--<import location="../../../ctakes-dictionary-lookup-fast/desc/analysis_engine/UmlsLookupAnnotator.xml"/>-->
+ <import location="../../../ctakes-dictionary-lookup-fast/desc/analysis_engine/UmlsLookupAnnotator.xml"/>
</delegateAnalysisEngine>
<delegateAnalysisEngine key="LookupWindowAnnotator">
<import location="../../../ctakes-clinical-pipeline/desc/analysis_engine/LookupWindowAnnotator.xml"/>
Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorPipeline.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorPipeline.java?rev=1893500&r1=1893499&r2=1893500&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorPipeline.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorPipeline.java Tue Sep 21 21:42:38 2021
@@ -19,19 +19,23 @@
package org.apache.ctakes.relationextractor.pipelines;
import org.apache.ctakes.core.config.ConfigParameterConstants;
+import org.apache.ctakes.core.cr.FileTreeReader;
+import org.apache.ctakes.core.util.doc.DocIdUtil;
import org.apache.uima.UIMAException;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.cas.SerialFormat;
import org.apache.uima.collection.CollectionReaderDescription;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.factory.CollectionReaderFactory;
import org.apache.uima.fit.pipeline.SimplePipeline;
-import org.apache.uima.fit.util.CasIOUtil;
import org.apache.uima.jcas.JCas;
+import org.apache.uima.util.CasIOUtils;
import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
import java.io.File;
+import java.io.FileOutputStream;
import java.io.IOException;
/**
@@ -66,19 +70,20 @@ public class RelationExtractorPipeline {
CmdLineParser parser = new CmdLineParser(options);
parser.parseArgument(args);
- CollectionReaderDescription collectionReader = CollectionReaderFactory.createReaderDescriptionFromPath(
- "../ctakes-core/desc/collection_reader/FilesInDirectoryCollectionReader.xml",
- ConfigParameterConstants.PARAM_INPUTDIR,
- options.inputDirectory );
+ CollectionReaderDescription collectionReader = CollectionReaderFactory.createReaderDescription(
+ FileTreeReader.class,
+ ConfigParameterConstants.PARAM_INPUTDIR,
+ options.inputDirectory);
// make sure the model parameters match those used for training
AnalysisEngineDescription relationExtractor = AnalysisEngineFactory.createEngineDescriptionFromPath(
"desc/analysis_engine/RelationExtractorAggregate.xml");
- int fileNum = 0;
for(JCas jcas : SimplePipeline.iteratePipeline(collectionReader, relationExtractor)){
- CasIOUtil.writeXmi(jcas, new File(options.outputDirectory, String.format("%d.txt", fileNum++)));
+ String docId = DocIdUtil.getDocumentID(jcas);
+ try(FileOutputStream fos = new FileOutputStream(new File(options.outputDirectory, String.format("%s.xmi", docId)))) {
+ CasIOUtils.save(jcas.getCas(), fos, SerialFormat.XMI);
+ }
}
-
}
}