You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ch...@apache.org on 2013/11/27 18:07:20 UTC

svn commit: r1546113 - /ctakes/sandbox/groovy/parser.groovy

Author: chenpei
Date: Wed Nov 27 17:07:19 2013
New Revision: 1546113

URL: http://svn.apache.org/r1546113
Log:
CTAKES-273 - Groovy Integration - Updated sample script for Tim's usecase.
runnign this script will output the Parse Text to console from input directory.

Modified:
    ctakes/sandbox/groovy/parser.groovy

Modified: ctakes/sandbox/groovy/parser.groovy
URL: http://svn.apache.org/viewvc/ctakes/sandbox/groovy/parser.groovy?rev=1546113&r1=1546112&r2=1546113&view=diff
==============================================================================
--- ctakes/sandbox/groovy/parser.groovy (original)
+++ ctakes/sandbox/groovy/parser.groovy Wed Nov 27 17:07:19 2013
@@ -1,4 +1,19 @@
 #!/usr/bin/env groovy
+/**
+** 	This assumes that you have installed Groovy and 
+** 	that you have the command groovy available in your path. 
+** 	On Debian/Ubuntu systems, installing Groovy should be as easy as apt-get install groovy.
+** 	You can download groovy from http://groovy.codehaus.org/
+** 	The first run may be slow since it needs to download all of the dependencies.
+**  Usage: $./parser.groovy [inputDir]
+** 	or enable more verbose status $groovy -Dgroovy.grape.report.downloads=true parser.groovy [inputDir]
+**/
+@Grab(group='org.apache.ctakes',
+      module='ctakes-core',
+            version='3.1.0')
+@Grab(group='org.apache.ctakes',
+      module='ctakes-core-res',
+            version='3.1.0')			
 @Grab(group='org.apache.ctakes',
       module='ctakes-constituency-parser',
             version='3.1.0')
@@ -6,15 +21,8 @@
       module='ctakes-constituency-parser-res',
             version='3.1.0')		
 @Grab(group='org.apache.ctakes',
-      module='ctakes-pos-tagger',
-            version='3.1.0')	
-@Grab(group='org.apache.ctakes',
-      module='ctakes-pos-tagger-res',
-            version='3.1.0')			
-@Grab(group='org.apache.ctakes',
       module='ctakes-clinical-pipeline',
             version='3.1.0')
-			
 import java.io.File;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
@@ -31,39 +39,60 @@ import static org.uimafit.util.JCasUtil.
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
 import org.apache.ctakes.typesystem.type.textspan.Segment;
 import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode;
+import org.apache.ctakes.core.resource.FileLocator;
 import org.apache.ctakes.core.ae.SentenceDetector;
 import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
 import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
-import org.apache.ctakes.postagger.POSTagger;
 import org.apache.ctakes.constituency.parser.ae.ConstituencyParser;
 
 		CollectionReader collectionReader = FilesCollectionReader.getCollectionReader(args[0]);
+		if(args.length < 1) {
+		System.out.println("Please specify input directory");
+		System.exit(1);
+		}
 		System.out.println("Reading from directory: " + args[0]);
-		
+
+		//Download Models
+		//TODO: Seperate downloads from URL here is a hack.  
+		//Models should really be automatically downloaded from 
+		//maven central as part of ctakes-*-res projects/artifacts via @grab.
+		//Illustrative purposes until we have all of the *-res artifacts in maven central.
+		downloadFile("http://svn.apache.org/repos/asf/ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/sentdetect/sd-med-model.zip","sd-med-model.zip");
+		downloadFile("http://svn.apache.org/repos/asf/ctakes/trunk/ctakes-constituency-parser-res/src/main/resources/org/apache/ctakes/constituency/parser/models/sharpacq-3.1.bin","sharpacq-3.1.bin");
+
+		//Build the pipeline to run
 		AggregateBuilder aggregateBuilder = new AggregateBuilder();
-		
 		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SimpleSegmentAnnotator.class));
-		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TokenizerAnnotatorPTB.class));
-		
-		/*  Need to resolve zip resoures from inside a jar first...
-		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-			POSTagger.class,
-			TypeSystemDescriptionFactory.createTypeSystemDescription(),
-			TypePrioritiesFactory.createTypePriorities(Segment.class, Sentence.class, BaseToken.class),
-			POSTagger.POS_MODEL_FILE_PARAM,
-			"org/apache/ctakes/postagger/models/mayo-pos.zip"));		
 		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
             SentenceDetector.class,
             SentenceDetector.SD_MODEL_FILE_PARAM,
-            "org/apache/ctakes/core/sentdetect/sd-med-model.zip"));			
-		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ConstituencyParser.class));
-		*/
+            "sd-med-model.zip"));
+		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TokenizerAnnotatorPTB.class));			
+		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+			ConstituencyParser.class,
+			ConstituencyParser.PARAM_MODELFILE,
+            "sharpacq-3.1.bin"));
 		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(Writer.class));
 		SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate());
 
 // Custom writer class used at the end of the pipeline to write results to screen
 class Writer extends org.uimafit.component.JCasAnnotator_ImplBase {
   void process(JCas jcas) {
-    select(jcas, Segment).each { println "${it.coveredText} begin:${it.begin} end:${it.end}"  }
+	//Get each Treebanknode and print out the text and it's parse string
+    select(jcas, TopTreebankNode).each { println "${it.treebankParse} "  }
   }
+}
+
+def downloadFile(String url, String filename) {
+	System.out.println("Downloading: " + url);
+	def file = new File(filename);
+	if(file.exists()) {
+	  System.out.println("File already exists:" + filename);
+	  return;
+	}
+    def f = new FileOutputStream(url.tokenize("/")[-1])
+    def out = new BufferedOutputStream(f)
+    out << new URL(url).openStream()
+    out.close()
 }
\ No newline at end of file