You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2013/05/15 21:16:54 UTC
svn commit: r1483018 - in /ctakes/trunk:
ctakes-constituency-parser-res/src/main/resources/org/apache/ctakes/constituency/parser/models/
ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/
Author: tmill
Date: Wed May 15 19:16:49 2013
New Revision: 1483018
URL: http://svn.apache.org/r1483018
Log:
Addresses ctakes-195. Two new models for the parser, one trained on sharp seed (1.5), one on all sharp (3.1), and the default changed to 3.1.
Added:
ctakes/trunk/ctakes-constituency-parser-res/src/main/resources/org/apache/ctakes/constituency/parser/models/sharpacq-3.1.bin (with props)
Modified:
ctakes/trunk/ctakes-constituency-parser-res/src/main/resources/org/apache/ctakes/constituency/parser/models/sharpacq-1.5.bin
ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ConstituencyParser.java
ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ParserEvaluationAnnotator.java
Modified: ctakes/trunk/ctakes-constituency-parser-res/src/main/resources/org/apache/ctakes/constituency/parser/models/sharpacq-1.5.bin
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-constituency-parser-res/src/main/resources/org/apache/ctakes/constituency/parser/models/sharpacq-1.5.bin?rev=1483018&r1=1483017&r2=1483018&view=diff
==============================================================================
Binary files - no diff available.
Added: ctakes/trunk/ctakes-constituency-parser-res/src/main/resources/org/apache/ctakes/constituency/parser/models/sharpacq-3.1.bin
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-constituency-parser-res/src/main/resources/org/apache/ctakes/constituency/parser/models/sharpacq-3.1.bin?rev=1483018&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/trunk/ctakes-constituency-parser-res/src/main/resources/org/apache/ctakes/constituency/parser/models/sharpacq-3.1.bin
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Modified: ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ConstituencyParser.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ConstituencyParser.java?rev=1483018&r1=1483017&r2=1483018&view=diff
==============================================================================
--- ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ConstituencyParser.java (original)
+++ ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ConstituencyParser.java Wed May 15 19:16:49 2013
@@ -33,7 +33,7 @@ import org.apache.uima.resource.Resource
public class ConstituencyParser extends JCasAnnotator_ImplBase {
public static final String PARAM_MODELFILE = "modelFilename";
- public static final String defaultModel = "org/apache/ctakes/constituency/parser/models/sharpacq-1.5.bin";
+ public static final String defaultModel = "org/apache/ctakes/constituency/parser/models/sharpacq-3.1.bin";
ParserWrapper parser = null;
Logger logger = Logger.getLogger(this.getClass());
Modified: ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ParserEvaluationAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ParserEvaluationAnnotator.java?rev=1483018&r1=1483017&r2=1483018&view=diff
==============================================================================
--- ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ParserEvaluationAnnotator.java (original)
+++ ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ParserEvaluationAnnotator.java Wed May 15 19:16:49 2013
@@ -21,39 +21,64 @@ package org.apache.ctakes.constituency.p
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.Collection;
-// import opennlp.tools.lang.english.TreebankParser; // no longer part of OpenNLP as of 1.5
+import opennlp.tools.cmdline.parser.ParserTool;
import opennlp.tools.parser.AbstractBottomUpParser;
import opennlp.tools.parser.Parse;
-import opennlp.tools.parser.Parser;
-import opennlp.tools.parser.ParserFactory;
import opennlp.tools.parser.ParserModel;
+import opennlp.tools.parser.chunking.Parser;
-import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.ctakes.constituency.parser.util.TreeUtils;
+import org.apache.ctakes.core.cr.LinesFromFileCollectionReader;
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode;
+import org.apache.uima.UIMAException;
+import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.collection.CollectionReader;
import org.apache.uima.jcas.JCas;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.descriptor.ConfigurationParameter;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.factory.CollectionReaderFactory;
+import org.uimafit.pipeline.JCasIterable;
+import org.uimafit.util.JCasUtil;
+// import opennlp.tools.lang.english.TreebankParser; // no longer part of OpenNLP as of 1.5
-import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode;
-import opennlp.tools.cmdline.parser.ParserTool;
-
-public class ParserEvaluationAnnotator extends JCasAnnotator_ImplBase {
+public class ParserEvaluationAnnotator extends JCasAnnotator_ImplBase{
+ public static final String PARAM_PARSERMODEL = "ParserModel";
+
+ @ConfigurationParameter(
+ name = PARAM_PARSERMODEL,
+ description = "Parser model file to use for parsing",
+ mandatory = false,
+ defaultValue = "org/apache/ctakes/constituency/parser/models/sharpacqwsj.bin"
+ )
+ private File parserModel;
+
Parser parser = null;
- private boolean useTagDictionary = true;
- private boolean useCaseSensitiveTagDictionary = true;
- private String parseStr = "";
@Override
public void initialize(org.apache.uima.UimaContext aContext) throws org.apache.uima.resource.ResourceInitializationException {
- String modelFileOrDirname = (String) aContext.getConfigParameterValue("modelDir");
+ super.initialize(aContext);
+// String modelFileOrDirname = (String) aContext.getConfigParameterValue("modelDir");
try {
- FileInputStream fis = new FileInputStream(new File(modelFileOrDirname));
+// FileInputStream fis = new FileInputStream(new File(modelFileOrDirname));
+// File parserFile = FileLocator.locateFile(parserModel);
+ FileInputStream fis = new FileInputStream(parserModel);
ParserModel model = new ParserModel(fis);
- parser = ParserFactory.create(model, AbstractBottomUpParser.defaultBeamSize, AbstractBottomUpParser.defaultAdvancePercentage); //TreebankParser.getParser(modelFileOrDirname, useTagDictionary, useCaseSensitiveTagDictionary, AbstractBottomUpParser.defaultBeamSize, AbstractBottomUpParser.defaultAdvancePercentage);
+// parser = ParserFactory.create(model, AbstractBottomUpParser.defaultBeamSize, AbstractBottomUpParser.defaultAdvancePercentage); //TreebankParser.getParser(modelFileOrDirname, useTagDictionary, useCaseSensitiveTagDictionary, AbstractBottomUpParser.defaultBeamSize, AbstractBottomUpParser.defaultAdvancePercentage);
+ parser = new Parser(model, AbstractBottomUpParser.defaultBeamSize, AbstractBottomUpParser.defaultAdvancePercentage);
+ fis.close();
} catch (IOException e) {
e.printStackTrace();
}
}
+
+
@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
String sent = jcas.getDocumentText();
@@ -73,4 +98,29 @@ public class ParserEvaluationAnnotator e
ttn.setTreebankParse(buff.toString());
ttn.addToIndexes();
}
+
+ public static void main(String[] args) throws UIMAException, IOException{
+ if(args.length < 2){
+ System.err.println("Requires 2 arguments: <input file> <output file>");
+ System.exit(-1);
+ }
+
+ CollectionReader reader = CollectionReaderFactory.createCollectionReader(LinesFromFileCollectionReader.class,
+ LinesFromFileCollectionReader.PARAM_INPUT_FILE_NAME,
+ args[0]);
+ PrintWriter out = new PrintWriter(args[1]);
+ AnalysisEngine ae = AnalysisEngineFactory.createPrimitive(ParserEvaluationAnnotator.class, new Object[]{});
+
+ JCas jcas = null;
+ JCasIterable casIter = new JCasIterable(reader, ae);
+ while(casIter.hasNext()){
+ jcas = casIter.next();
+ Collection<TopTreebankNode> nodes = JCasUtil.select(jcas, TopTreebankNode.class);
+ for(TopTreebankNode tree : nodes){
+ out.println(tree.getTreebankParse());
+ }
+ }
+ out.close();
+
+ }
}