You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2021/09/21 21:42:39 UTC

svn commit: r1893500 - in /ctakes/trunk/ctakes-relation-extractor: desc/analysis_engine/ src/main/java/org/apache/ctakes/relationextractor/pipelines/

Author: tmill
Date: Tue Sep 21 21:42:38 2021
New Revision: 1893500

URL: http://svn.apache.org/viewvc?rev=1893500&view=rev
Log:
Update relation extraction pipeline with modern uima library calls. also update to use fast dictionary in aggregates.

Modified:
    ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorAggregate.xml
    ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorPreprocessor.xml
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorPipeline.java

Modified: ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorAggregate.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorAggregate.xml?rev=1893500&r1=1893499&r2=1893500&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorAggregate.xml (original)
+++ ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorAggregate.xml Tue Sep 21 21:42:38 2021
@@ -153,7 +153,7 @@
                         <import location="../../../ctakes-chunker/desc/Chunker.xml"/>
                     </delegateAnalysisEngine>
                     <delegateAnalysisEngine key="DictionaryLookupAnnotatorDB">
-                        <import location="../../../ctakes-dictionary-lookup/desc/analysis_engine/DictionaryLookupAnnotatorUMLS.xml"/>
+                        <import location="../../../ctakes-dictionary-lookup-fast/desc/analysis_engine/UmlsLookupAnnotator.xml"/>
                     </delegateAnalysisEngine>
                     <delegateAnalysisEngine key="LookupWindowAnnotator">
                         <import location="../../../ctakes-clinical-pipeline/desc/analysis_engine/LookupWindowAnnotator.xml"/>

Modified: ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorPreprocessor.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorPreprocessor.xml?rev=1893500&r1=1893499&r2=1893500&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorPreprocessor.xml (original)
+++ ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorPreprocessor.xml Tue Sep 21 21:42:38 2021
@@ -60,8 +60,9 @@
       <import location="../../../ctakes-dependency-parser/desc/analysis_engine/ClearNLPDependencyParserAE.xml"/>
     </delegateAnalysisEngine>
     <delegateAnalysisEngine key="DictionaryLookupAnnotatorDB">
-      <import location="../../../ctakes-dictionary-lookup/desc/analysis_engine/DictionaryLookupAnnotatorUMLS.xml"/>
+      <!--<import location="../../../ctakes-dictionary-lookup/desc/analysis_engine/DictionaryLookupAnnotatorUMLS.xml"/>-->
        <!--<import location="../../../ctakes-dictionary-lookup-fast/desc/analysis_engine/UmlsLookupAnnotator.xml"/>-->
+      <import location="../../../ctakes-dictionary-lookup-fast/desc/analysis_engine/UmlsLookupAnnotator.xml"/>
     </delegateAnalysisEngine>
     <delegateAnalysisEngine key="LookupWindowAnnotator">
       <import location="../../../ctakes-clinical-pipeline/desc/analysis_engine/LookupWindowAnnotator.xml"/>

Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorPipeline.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorPipeline.java?rev=1893500&r1=1893499&r2=1893500&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorPipeline.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorPipeline.java Tue Sep 21 21:42:38 2021
@@ -19,19 +19,23 @@
 package org.apache.ctakes.relationextractor.pipelines;
 
 import org.apache.ctakes.core.config.ConfigParameterConstants;
+import org.apache.ctakes.core.cr.FileTreeReader;
+import org.apache.ctakes.core.util.doc.DocIdUtil;
 import org.apache.uima.UIMAException;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.cas.SerialFormat;
 import org.apache.uima.collection.CollectionReaderDescription;
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
 import org.apache.uima.fit.factory.CollectionReaderFactory;
 import org.apache.uima.fit.pipeline.SimplePipeline;
-import org.apache.uima.fit.util.CasIOUtil;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.util.CasIOUtils;
 import org.kohsuke.args4j.CmdLineException;
 import org.kohsuke.args4j.CmdLineParser;
 import org.kohsuke.args4j.Option;
 
 import java.io.File;
+import java.io.FileOutputStream;
 import java.io.IOException;
 
 /**
@@ -66,19 +70,20 @@ public class RelationExtractorPipeline {
 		CmdLineParser parser = new CmdLineParser(options);
 		parser.parseArgument(args);
 
-		CollectionReaderDescription collectionReader = CollectionReaderFactory.createReaderDescriptionFromPath(
-				"../ctakes-core/desc/collection_reader/FilesInDirectoryCollectionReader.xml",
-            ConfigParameterConstants.PARAM_INPUTDIR,
-            options.inputDirectory );
+		CollectionReaderDescription collectionReader = CollectionReaderFactory.createReaderDescription(
+				FileTreeReader.class,
+				ConfigParameterConstants.PARAM_INPUTDIR,
+				options.inputDirectory);
 
 		// make sure the model parameters match those used for training
 		AnalysisEngineDescription relationExtractor = AnalysisEngineFactory.createEngineDescriptionFromPath(
 				"desc/analysis_engine/RelationExtractorAggregate.xml");
     
-		int fileNum = 0;
 		for(JCas jcas : SimplePipeline.iteratePipeline(collectionReader, relationExtractor)){
-			CasIOUtil.writeXmi(jcas, new File(options.outputDirectory, String.format("%d.txt", fileNum++)));
+			String docId = DocIdUtil.getDocumentID(jcas);
+			try(FileOutputStream fos = new FileOutputStream(new File(options.outputDirectory, String.format("%s.xmi", docId)))) {
+				CasIOUtils.save(jcas.getCas(), fos, SerialFormat.XMI);
+			}
 		}
-    
 	}
 }