You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ch...@apache.org on 2013/11/27 18:07:20 UTC
svn commit: r1546113 - /ctakes/sandbox/groovy/parser.groovy
Author: chenpei
Date: Wed Nov 27 17:07:19 2013
New Revision: 1546113
URL: http://svn.apache.org/r1546113
Log:
CTAKES-273 - Groovy Integration - Updated sample script for Tim's usecase.
runnign this script will output the Parse Text to console from input directory.
Modified:
ctakes/sandbox/groovy/parser.groovy
Modified: ctakes/sandbox/groovy/parser.groovy
URL: http://svn.apache.org/viewvc/ctakes/sandbox/groovy/parser.groovy?rev=1546113&r1=1546112&r2=1546113&view=diff
==============================================================================
--- ctakes/sandbox/groovy/parser.groovy (original)
+++ ctakes/sandbox/groovy/parser.groovy Wed Nov 27 17:07:19 2013
@@ -1,4 +1,19 @@
#!/usr/bin/env groovy
+/**
+** This assumes that you have installed Groovy and
+** that you have the command groovy available in your path.
+** On Debian/Ubuntu systems, installing Groovy should be as easy as apt-get install groovy.
+** You can download groovy from http://groovy.codehaus.org/
+** The first run may be slow since it needs to download all of the dependencies.
+** Usage: $./parser.groovy [inputDir]
+** or enable more verbose status $groovy -Dgroovy.grape.report.downloads=true parser.groovy [inputDir]
+**/
+@Grab(group='org.apache.ctakes',
+ module='ctakes-core',
+ version='3.1.0')
+@Grab(group='org.apache.ctakes',
+ module='ctakes-core-res',
+ version='3.1.0')
@Grab(group='org.apache.ctakes',
module='ctakes-constituency-parser',
version='3.1.0')
@@ -6,15 +21,8 @@
module='ctakes-constituency-parser-res',
version='3.1.0')
@Grab(group='org.apache.ctakes',
- module='ctakes-pos-tagger',
- version='3.1.0')
-@Grab(group='org.apache.ctakes',
- module='ctakes-pos-tagger-res',
- version='3.1.0')
-@Grab(group='org.apache.ctakes',
module='ctakes-clinical-pipeline',
version='3.1.0')
-
import java.io.File;
import org.apache.uima.jcas.JCas;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
@@ -31,39 +39,60 @@ import static org.uimafit.util.JCasUtil.
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode;
+import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.core.ae.SentenceDetector;
import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
-import org.apache.ctakes.postagger.POSTagger;
import org.apache.ctakes.constituency.parser.ae.ConstituencyParser;
CollectionReader collectionReader = FilesCollectionReader.getCollectionReader(args[0]);
+ if(args.length < 1) {
+ System.out.println("Please specify input directory");
+ System.exit(1);
+ }
System.out.println("Reading from directory: " + args[0]);
-
+
+ //Download Models
+ //TODO: Seperate downloads from URL here is a hack.
+ //Models should really be automatically downloaded from
+ //maven central as part of ctakes-*-res projects/artifacts via @grab.
+ //Illustrative purposes until we have all of the *-res artifacts in maven central.
+ downloadFile("http://svn.apache.org/repos/asf/ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/sentdetect/sd-med-model.zip","sd-med-model.zip");
+ downloadFile("http://svn.apache.org/repos/asf/ctakes/trunk/ctakes-constituency-parser-res/src/main/resources/org/apache/ctakes/constituency/parser/models/sharpacq-3.1.bin","sharpacq-3.1.bin");
+
+ //Build the pipeline to run
AggregateBuilder aggregateBuilder = new AggregateBuilder();
-
aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SimpleSegmentAnnotator.class));
- aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TokenizerAnnotatorPTB.class));
-
- /* Need to resolve zip resoures from inside a jar first...
- aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
- POSTagger.class,
- TypeSystemDescriptionFactory.createTypeSystemDescription(),
- TypePrioritiesFactory.createTypePriorities(Segment.class, Sentence.class, BaseToken.class),
- POSTagger.POS_MODEL_FILE_PARAM,
- "org/apache/ctakes/postagger/models/mayo-pos.zip"));
aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
SentenceDetector.class,
SentenceDetector.SD_MODEL_FILE_PARAM,
- "org/apache/ctakes/core/sentdetect/sd-med-model.zip"));
- aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ConstituencyParser.class));
- */
+ "sd-med-model.zip"));
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TokenizerAnnotatorPTB.class));
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+ ConstituencyParser.class,
+ ConstituencyParser.PARAM_MODELFILE,
+ "sharpacq-3.1.bin"));
aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(Writer.class));
SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate());
// Custom writer class used at the end of the pipeline to write results to screen
class Writer extends org.uimafit.component.JCasAnnotator_ImplBase {
void process(JCas jcas) {
- select(jcas, Segment).each { println "${it.coveredText} begin:${it.begin} end:${it.end}" }
+ //Get each Treebanknode and print out the text and it's parse string
+ select(jcas, TopTreebankNode).each { println "${it.treebankParse} " }
}
+}
+
+def downloadFile(String url, String filename) {
+ System.out.println("Downloading: " + url);
+ def file = new File(filename);
+ if(file.exists()) {
+ System.out.println("File already exists:" + filename);
+ return;
+ }
+ def f = new FileOutputStream(url.tokenize("/")[-1])
+ def out = new BufferedOutputStream(f)
+ out << new URL(url).openStream()
+ out.close()
}
\ No newline at end of file