You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2013/05/15 21:16:54 UTC

svn commit: r1483018 - in /ctakes/trunk: ctakes-constituency-parser-res/src/main/resources/org/apache/ctakes/constituency/parser/models/ ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/

Author: tmill
Date: Wed May 15 19:16:49 2013
New Revision: 1483018

URL: http://svn.apache.org/r1483018
Log:
Addresses ctakes-195. Two new models for the parser, one trained on sharp seed (1.5), one on all sharp (3.1), and the default changed to 3.1.

Added:
    ctakes/trunk/ctakes-constituency-parser-res/src/main/resources/org/apache/ctakes/constituency/parser/models/sharpacq-3.1.bin   (with props)
Modified:
    ctakes/trunk/ctakes-constituency-parser-res/src/main/resources/org/apache/ctakes/constituency/parser/models/sharpacq-1.5.bin
    ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ConstituencyParser.java
    ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ParserEvaluationAnnotator.java

Modified: ctakes/trunk/ctakes-constituency-parser-res/src/main/resources/org/apache/ctakes/constituency/parser/models/sharpacq-1.5.bin
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-constituency-parser-res/src/main/resources/org/apache/ctakes/constituency/parser/models/sharpacq-1.5.bin?rev=1483018&r1=1483017&r2=1483018&view=diff
==============================================================================
Binary files - no diff available.

Added: ctakes/trunk/ctakes-constituency-parser-res/src/main/resources/org/apache/ctakes/constituency/parser/models/sharpacq-3.1.bin
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-constituency-parser-res/src/main/resources/org/apache/ctakes/constituency/parser/models/sharpacq-3.1.bin?rev=1483018&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/trunk/ctakes-constituency-parser-res/src/main/resources/org/apache/ctakes/constituency/parser/models/sharpacq-3.1.bin
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Modified: ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ConstituencyParser.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ConstituencyParser.java?rev=1483018&r1=1483017&r2=1483018&view=diff
==============================================================================
--- ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ConstituencyParser.java (original)
+++ ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ConstituencyParser.java Wed May 15 19:16:49 2013
@@ -33,7 +33,7 @@ import org.apache.uima.resource.Resource
 
 public class ConstituencyParser extends JCasAnnotator_ImplBase {
 	public static final String PARAM_MODELFILE = "modelFilename";
-	public static final String defaultModel = "org/apache/ctakes/constituency/parser/models/sharpacq-1.5.bin";
+	public static final String defaultModel = "org/apache/ctakes/constituency/parser/models/sharpacq-3.1.bin";
 	
 	ParserWrapper parser = null;
 	Logger logger = Logger.getLogger(this.getClass());

Modified: ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ParserEvaluationAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ParserEvaluationAnnotator.java?rev=1483018&r1=1483017&r2=1483018&view=diff
==============================================================================
--- ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ParserEvaluationAnnotator.java (original)
+++ ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ParserEvaluationAnnotator.java Wed May 15 19:16:49 2013
@@ -21,39 +21,64 @@ package org.apache.ctakes.constituency.p
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.Collection;
 
-// import opennlp.tools.lang.english.TreebankParser; // no longer part of OpenNLP as of 1.5
+import opennlp.tools.cmdline.parser.ParserTool;
 import opennlp.tools.parser.AbstractBottomUpParser;
 import opennlp.tools.parser.Parse;
-import opennlp.tools.parser.Parser;
-import opennlp.tools.parser.ParserFactory;
 import opennlp.tools.parser.ParserModel;
+import opennlp.tools.parser.chunking.Parser;
 
-import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.ctakes.constituency.parser.util.TreeUtils;
+import org.apache.ctakes.core.cr.LinesFromFileCollectionReader;
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode;
+import org.apache.uima.UIMAException;
+import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.collection.CollectionReader;
 import org.apache.uima.jcas.JCas;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.descriptor.ConfigurationParameter;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.factory.CollectionReaderFactory;
+import org.uimafit.pipeline.JCasIterable;
+import org.uimafit.util.JCasUtil;
+// import opennlp.tools.lang.english.TreebankParser; // no longer part of OpenNLP as of 1.5
 
-import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode;
-import opennlp.tools.cmdline.parser.ParserTool;
-
-public class ParserEvaluationAnnotator extends JCasAnnotator_ImplBase {
+public class ParserEvaluationAnnotator extends JCasAnnotator_ImplBase{
 
+	public static final String PARAM_PARSERMODEL = "ParserModel";
+	
+	@ConfigurationParameter(
+			name = PARAM_PARSERMODEL,
+			description = "Parser model file to use for parsing",
+			mandatory = false,
+			defaultValue = "org/apache/ctakes/constituency/parser/models/sharpacqwsj.bin"
+	)
+	private File parserModel;
+			
 	Parser parser = null;
-	private boolean useTagDictionary = true;
-	private boolean useCaseSensitiveTagDictionary = true;
-	private String parseStr = "";
 	
 	@Override
 	public void initialize(org.apache.uima.UimaContext aContext) throws org.apache.uima.resource.ResourceInitializationException {
-		String modelFileOrDirname = (String) aContext.getConfigParameterValue("modelDir");
+		super.initialize(aContext);
+//		String modelFileOrDirname = (String) aContext.getConfigParameterValue("modelDir");
 		try {
-			FileInputStream fis = new FileInputStream(new File(modelFileOrDirname));
+//			FileInputStream fis = new FileInputStream(new File(modelFileOrDirname));
+//			File parserFile = FileLocator.locateFile(parserModel);
+			FileInputStream fis = new FileInputStream(parserModel);
 			ParserModel model = new ParserModel(fis);
-			parser = ParserFactory.create(model, AbstractBottomUpParser.defaultBeamSize, AbstractBottomUpParser.defaultAdvancePercentage); //TreebankParser.getParser(modelFileOrDirname, useTagDictionary, useCaseSensitiveTagDictionary, AbstractBottomUpParser.defaultBeamSize, AbstractBottomUpParser.defaultAdvancePercentage);
+//			parser = ParserFactory.create(model, AbstractBottomUpParser.defaultBeamSize, AbstractBottomUpParser.defaultAdvancePercentage); //TreebankParser.getParser(modelFileOrDirname, useTagDictionary, useCaseSensitiveTagDictionary, AbstractBottomUpParser.defaultBeamSize, AbstractBottomUpParser.defaultAdvancePercentage);
+			parser = new Parser(model, AbstractBottomUpParser.defaultBeamSize, AbstractBottomUpParser.defaultAdvancePercentage);
+			fis.close();
 		} catch (IOException e) {
 			e.printStackTrace();
 		}	
 	}
+	
+	
 	@Override
 	public void process(JCas jcas) throws AnalysisEngineProcessException {
 		String sent = jcas.getDocumentText();
@@ -73,4 +98,29 @@ public class ParserEvaluationAnnotator e
 		ttn.setTreebankParse(buff.toString());
 		ttn.addToIndexes();
 	}
+	
+	public static void main(String[] args) throws UIMAException, IOException{
+		if(args.length < 2){
+			System.err.println("Requires 2 arguments: <input file> <output file>");
+			System.exit(-1);
+		}
+		
+		CollectionReader reader = CollectionReaderFactory.createCollectionReader(LinesFromFileCollectionReader.class,
+				LinesFromFileCollectionReader.PARAM_INPUT_FILE_NAME,
+				args[0]);
+		PrintWriter out = new PrintWriter(args[1]);
+		AnalysisEngine ae = AnalysisEngineFactory.createPrimitive(ParserEvaluationAnnotator.class, new Object[]{});
+		
+		JCas jcas = null;
+		JCasIterable casIter = new JCasIterable(reader, ae);
+		while(casIter.hasNext()){
+			jcas = casIter.next();
+			Collection<TopTreebankNode> nodes = JCasUtil.select(jcas, TopTreebankNode.class);
+			for(TopTreebankNode tree : nodes){
+				out.println(tree.getTreebankParse());
+			}
+		}
+		out.close();
+		
+	}
 }