You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ch...@apache.org on 2013/04/05 18:53:09 UTC
svn commit: r1465043 [2/2] - in /ctakes/trunk: ./
ctakes-assertion/desc/analysis_engine/
ctakes-clinical-pipeline/desc/analysis_engine/
ctakes-clinical-pipeline/desc/analysis_engine/assertion_training_steps/
ctakes-clinical-pipeline/resources/launch/ c...
Copied: ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearNLPSemanticRoleLabelerAE.java (from r1464565, ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearParserSemanticRoleLabelerAE.java)
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearNLPSemanticRoleLabelerAE.java?p2=ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearNLPSemanticRoleLabelerAE.java&p1=ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearParserSemanticRoleLabelerAE.java&r1=1464565&r2=1465043&rev=1465043&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearParserSemanticRoleLabelerAE.java (original)
+++ ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearNLPSemanticRoleLabelerAE.java Fri Apr 5 16:53:06 2013
@@ -19,10 +19,14 @@
package org.apache.ctakes.dependency.parser.ae;
import java.io.File;
+import java.io.FileInputStream;
import java.io.IOException;
+import java.io.InputStream;
import java.net.MalformedURLException;
+import java.net.URI;
import java.net.URL;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -47,28 +51,30 @@ import org.uimafit.descriptor.Configurat
import org.uimafit.descriptor.TypeCapability;
import org.uimafit.util.JCasUtil;
-import clear.dep.DepNode;
-import clear.dep.DepTree;
-import clear.dep.srl.SRLHead;
-import clear.morph.MorphEnAnalyzer;
-import clear.parse.AbstractSRLParser;
-import clear.reader.AbstractReader;
+import com.googlecode.clearnlp.component.AbstractComponent;
+import com.googlecode.clearnlp.dependency.DEPArc;
+import com.googlecode.clearnlp.dependency.DEPFeat;
+import com.googlecode.clearnlp.dependency.DEPLib;
+import com.googlecode.clearnlp.dependency.DEPNode;
+import com.googlecode.clearnlp.dependency.DEPTree;
+import com.googlecode.clearnlp.engine.EngineGetter;
+import com.googlecode.clearnlp.nlp.NLPLib;
+
+import com.googlecode.clearnlp.reader.AbstractReader;
/**
- *This class provides a UIMA wrapper for the ClearParser Semantic Role Labeler, which is
+ *This class provides a UIMA wrapper for the ClearNLP Semantic Role Labeler, which is
* available here.
* <p>
- * http://code.google.com/p/clearparser/
+ * http://code.google.com/p/clearnlp
* <p>
* Before using this AnalysisEngine, you should run a Tokenizer, POS-tagger, Lemmatizer, and the
* CLEAR parser dependency parser.
* <p>
- * Please see /clearparser-wrapper/resources/dependency/clear/README for
+ * Please see /ClearNLP-wrapper/resources/dependency/clear/README for
* important information pertaining to the models provided for this parser.
* <p>
*
- * @author Lee Becker
- *
*/
@TypeCapability(
inputs = {
@@ -78,65 +84,68 @@ import clear.reader.AbstractReader;
"org.apache.ctakes.typesystem.type.syntax.BaseToken:begin",
"org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode"
})
-public class ClearParserSemanticRoleLabelerAE extends JCasAnnotator_ImplBase {
-
+public class ClearNLPSemanticRoleLabelerAE extends JCasAnnotator_ImplBase {
+final String language = AbstractReader.LANG_EN;
public Logger logger = Logger.getLogger(getClass().getName());
-
- public static final String DEFAULT_MODEL_FILE_NAME = "org/apache/ctakes/dependency/parser/models/srl/en_srl_ontonotes.jar";
-
- public static final String ENG_LEMMATIZER_DATA_FILE = "org/apache/ctakes/dependency/parser/models/lemmatizer/wordnet-3.0-lemma-data.jar";
-
-
- public static final String PARAM_PARSER_MODEL_FILE_NAME = "ParserModelFileName";
+ public static final String DEFAULT_SRL_MODEL_FILE_NAME = "org/apache/ctakes/dependency/parser/models/srl/mayo-en-srl-1.3.0.jar";
+ public static final String DEFAULT_PRED_MODEL_FILE_NAME = "org/apache/ctakes/dependency/parser/models/pred/mayo-en-pred-1.3.0.jar";
+ public static final String DEFAULT_ROLE_MODEL_FILE_NAME = "org/apache/ctakes/dependency/parser/models/role/mayo-en-role-1.3.0.jar";
+
+
+ public static final String PARAM_PARSER_MODEL_FILE_NAME = "ParserModelFileName";
+ public static final String PARAM_PRED_MODEL_FILE_NAME = "ParserPredFileName";
+ public static final String PARAM_ROLE_MODEL_FILE_NAME = "ParserRoleFileName";
+
+ protected URI srlModelUri;
+ protected URI srlPredUri;
+ protected URI srlRoleUri;
+
@ConfigurationParameter(
name = PARAM_PARSER_MODEL_FILE_NAME,
- description = "This parameter provides the file name of the semantic role labeler model required by the factory method provided by ClearParserUtil.")
+ description = "This parameter provides the file name of the semantic role labeler model required by the factory method provided by ClearNLPUtil.")
private String parserModelFileName;
public static final String PARAM_LEMMATIZER_DATA_FILE = "LemmatizerDataFile";
+
+ @ConfigurationParameter(
+ name = PARAM_PRED_MODEL_FILE_NAME,
+ description = "This parameter provides the file name of the semantic role labeler model required by the factory method provided by ClearNLPUtil.")
+ private String parserPredFileName;
@ConfigurationParameter(
- name = PARAM_LEMMATIZER_DATA_FILE,
- description = "This parameter provides the data file required for the MorphEnAnalyzer. If not "
- + "specified, this analysis engine will use a default model from the resources directory")
- protected File lemmatizerDataFile;
+ name = PARAM_ROLE_MODEL_FILE_NAME,
+ description = "This parameter provides the file name of the semantic role labeler model required by the factory method provided by ClearNLPUtil.")
+ private String parserRoleFileName;
- public static final String PARAM_USE_LEMMATIZER = "UseLemmatizer";
- @ConfigurationParameter(
- name = PARAM_USE_LEMMATIZER,
- defaultValue = "true",
- description = "If true, use the default ClearParser lemmatizer, otherwise use lemmas from the BaseToken normalizedToken field")
- protected boolean useLemmatizer;
- protected AbstractSRLParser parser;
- protected MorphEnAnalyzer lemmatizer;
+ protected AbstractComponent srlabeler;
+ protected AbstractComponent identifier;
+ protected AbstractComponent classifier;
+
@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
super.initialize(context);
-
- logger.info("using Morphy analysis? " + useLemmatizer);
- try {
- if (useLemmatizer) {
- // Note: If lemmatizer data file is not specified, then use lemmas from the BaseToken normalizedToken field.
- // Initialize lemmatizer
- URL lemmatizerDataFileURL = this.lemmatizerDataFile == null
- ? this.getClass().getClassLoader().getResource(ENG_LEMMATIZER_DATA_FILE)
- : this.lemmatizerDataFile.toURI().toURL();
- lemmatizer = new MorphEnAnalyzer(new URL(lemmatizerDataFileURL.toString()));
- }
-
- // Initialize role labeler
- URL parserModelURL = this.parserModelFileName == null
- ? this.getClass().getClassLoader().getResource(DEFAULT_MODEL_FILE_NAME)
- : new File(this.parserModelFileName).toURI().toURL();
- parser = ClearParserUtil.createSRLParser(parserModelURL.openStream());
+ try {
+
+ URL srlPredURL = (this.srlPredUri == null)
+ ? this.getClass().getClassLoader().getResource(DEFAULT_PRED_MODEL_FILE_NAME).toURI().toURL()
+ : this.srlPredUri.toURL();
+ this.identifier = EngineGetter.getComponent(srlPredURL.openStream(), this.language, NLPLib.MODE_PRED);
+
+ URL srlRoleURL = (this.srlRoleUri == null)
+ ? this.getClass().getClassLoader().getResource(DEFAULT_ROLE_MODEL_FILE_NAME).toURI().toURL()
+ : this.srlRoleUri.toURL();
+ this.classifier = EngineGetter.getComponent(srlRoleURL.openStream(), this.language, NLPLib.MODE_ROLE);
+
+ URL srlModelURL = (this.srlModelUri == null)
+ ? this.getClass().getClassLoader().getResource(DEFAULT_SRL_MODEL_FILE_NAME).toURI().toURL()
+ : this.srlModelUri.toURL();
+ this.srlabeler = EngineGetter.getComponent(srlModelURL.openStream(), this.language, NLPLib.MODE_SRL);
- } catch (MalformedURLException e) {
- throw new ResourceInitializationException(e);
- } catch (IOException e) {
+ } catch (Exception e) {
throw new ResourceInitializationException(e);
}
}
@@ -145,10 +154,10 @@ public class ClearParserSemanticRoleLabe
public void process(JCas jCas) throws AnalysisEngineProcessException {
for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
List<BaseToken> tokens = JCasUtil.selectCovered(jCas, BaseToken.class, sentence);
- DepTree tree = new DepTree();
+ DEPTree tree = new DEPTree();
// Build map between CAS dependency node and id for later creation of
- // ClearParser dependency node/tree
+ // ClearNLP dependency node/tree
Map<ConllDependencyNode, Integer> depNodeToID = new HashMap<ConllDependencyNode, Integer>();
int nodeId = 1;
for (ConllDependencyNode depNode : JCasUtil.selectCovered(jCas, ConllDependencyNode.class, sentence)) {
@@ -160,116 +169,109 @@ public class ClearParserSemanticRoleLabe
depNodeToID.put(depNode, nodeId);
nodeId++;
}
- }
+ }
+
+ int[] headIDs = new int[tokens.size()];
+ String[] deprels = new String[tokens.size()];
- // Initialize Token / Sentence info for the ClearParser Semantic Role Labeler
+ // Initialize Token / Sentence info for the ClearNLP Semantic Role Labeler
for (int i = 0; i < tokens.size(); i++) {
BaseToken token = tokens.get(i);
// Determine HeadId
- DepNode node = new DepNode();
ConllDependencyNode casDepNode = JCasUtil.selectCovered(jCas, ConllDependencyNode.class, token).get(0);
- casDepNode.getDeprel();
- String headRelation = casDepNode.getDeprel();
+ deprels[i] = casDepNode.getDeprel();
ConllDependencyNode head = casDepNode.getHead();
-
+
// If there is no head, this is the head node, set node to 0
- int headId = (head == null) ? 0 : depNodeToID.get(head);
+ headIDs[i] = (head == null) ? 0 : depNodeToID.get(head);
// Populate Dependency Node / Tree information
- node.id = i + 1;
- node.form = token.getCoveredText();
- node.pos = token.getPartOfSpeech();
- node.lemma = useLemmatizer ? "" : token.getNormalizedForm();
- node.setHead(headId, headRelation, 0);
+ int id = i + 1;
+ String form = casDepNode.getForm();
+ String pos = casDepNode.getPostag();
+ String lemma = casDepNode.getLemma();
+
+ DEPNode node = new DEPNode(id, form, lemma, pos, new DEPFeat());
tree.add(node);
- }
- tree.setPredicates(AbstractReader.LANG_EN);
-
+ }
+
+ for (int i=1; i<tree.size(); i++)
+ {
+ DEPNode node = tree.get(i);
+ DEPNode head = tree.get(headIDs[i-1]);
+ String label = deprels[i-1];
+ node.setHead(head, label);
+ }
+
+ tree.initSHeads();
+
// Run the SRL
- parser.parse(tree);
+ identifier.process(tree);
+ classifier.process(tree);
+ srlabeler.process(tree);
+
- // Convert ClearParser SRL output to CAS types
+ // Convert ClearNLP SRL output to CAS types
extractSRLInfo(jCas, tokens, tree);
}
-
- /* FIXME DELETEME
- for (Sentence sentence: JCasUtil.select(jCas, Sentence.class)) {
- System.out.println("Sentence: " + sentence.getCoveredText());
- for (Predicate predicate : JCasUtil.selectCovered(jCas, Predicate.class, sentence)) {
- System.out.println("\t" + predicate.getCoveredText() + "/" + "pred");
-
-
- for (SemanticRoleRelation relations : JCasUtil.select(predicate.getRelations(), SemanticRoleRelation.class)) {
- SemanticArgument argument = relations.getArgument();
- System.out.println("\t" + argument.getCoveredText() + "/" + argument.getLabel());
- }
-
- }
- }
- */
-
+
}
/**
- * Converts the output from the ClearParser Semantic Role Labeler to the ClearTK Predicate and
+ * Converts the output from the ClearNLP Semantic Role Labeler to the ClearTK Predicate and
* SemanticArgument Types.
*
* @param jCas
* @param tokens
* - In order list of tokens
* @param tree
- * - DepdendencyTree output by ClearParser SRLPredict
+ * - DepdendencyTree output by ClearNLP SRLPredict
*/
- private void extractSRLInfo(JCas jCas, List<BaseToken> tokens, DepTree tree) {
- Map<Integer, Predicate> headIdToPredicate = new HashMap<Integer, Predicate>();
+ private void extractSRLInfo(JCas jCas, List<BaseToken> tokens, DEPTree tree) {
+ Map<DEPNode, Predicate> headIdToPredicate = new HashMap<DEPNode, Predicate>();
Map<Predicate, List<SemanticArgument>> predicateArguments = new HashMap<Predicate, List<SemanticArgument>>();
-
+
+ for (int i = 1; i < tree.size(); i++) {
+ // Every ClearNLP parserNode will contain an srlInfo field.
+ DEPNode parserNode = tree.get(i);
+ BaseToken token = tokens.get(i - 1);
+ String rolesetId;
+
+ if ((rolesetId = parserNode.getFeat(DEPLib.FEAT_PB)) != null)
+ {
+ if (!headIdToPredicate.containsKey(parserNode)) {
+ // We have not encountered this predicate yet, so create it
+ Predicate pred = this.createPredicate(jCas, rolesetId, token);
+ headIdToPredicate.put(parserNode, pred);
+ pred.setRelations(new EmptyFSList(jCas));
+ }
+ }
+ }
+
+
// Start at node 1, since node 0 is considered the head of the sentence
for (int i = 1; i < tree.size(); i++) {
- // Every ClearParser parserNode will contain an srlInfo field.
- DepNode parserNode = tree.get(i);
+ // Every ClearNLP parserNode will contain an srlInfo field.
+ DEPNode parserNode = tree.get(i);
BaseToken token = tokens.get(i - 1);
- if (parserNode.srlInfo == null) {
- continue;
- }
-
- if (parserNode.srlInfo.isPredicate()) {
- int headId = i;
- if (!headIdToPredicate.containsKey(headId)) {
- // We have not encountered this predicate yet, so create it
- Predicate pred = this.createPredicate(jCas, parserNode.srlInfo.rolesetId, token);
- headIdToPredicate.put(headId, pred);
- pred.setRelations(new EmptyFSList(jCas));
- }
- } else {
- for (SRLHead head : parserNode.srlInfo.heads) {
- Predicate predicate;
-
- // Determine which predicate this argument belongs to
- if (!headIdToPredicate.containsKey(head.headId)) {
- // The predicate hasn't been encountered, so create it
- BaseToken headToken = tokens.get(head.headId - 1);
- predicate = this.createPredicate(jCas, parserNode.srlInfo.rolesetId, headToken);
- headIdToPredicate.put(head.headId, predicate);
- } else {
- predicate = headIdToPredicate.get(head.headId);
- }
-
- // Append this argument to the predicate's list of arguments
- if (!predicateArguments.containsKey(predicate)) {
- predicateArguments.put(predicate, new ArrayList<SemanticArgument>());
- }
- List<SemanticArgument> argumentList = predicateArguments.get(predicate);
-
- // Create the semantic argument and store for later link creation
- SemanticArgument argument = createArgument(jCas, head, token);
- argumentList.add(argument);
- }
- }
- }
+
+ for (DEPArc head : parserNode.getSHeads()) {
+ Predicate predicate = headIdToPredicate.get(head.getNode());
+
+ // Append this argument to the predicate's list of arguments
+ if (!predicateArguments.containsKey(predicate)) {
+ predicateArguments.put(predicate, new ArrayList<SemanticArgument>());
+ }
+ List<SemanticArgument> argumentList = predicateArguments.get(predicate);
+
+ // Create the semantic argument and store for later link creation
+ SemanticArgument argument = createArgument(jCas, head, token);
+ argumentList.add(argument);
+ }
+ }
// Create relations between predicates and arguments
for (Map.Entry<Predicate, List<SemanticArgument>> entry : predicateArguments.entrySet()) {
@@ -298,9 +300,9 @@ public class ClearParserSemanticRoleLabe
return pred;
}
- private SemanticArgument createArgument(JCas jCas, SRLHead head, BaseToken token) {
+ private SemanticArgument createArgument(JCas jCas, DEPArc head, BaseToken token) {
SemanticArgument argument = new SemanticArgument(jCas, token.getBegin(), token.getEnd());
- argument.setLabel(head.label);
+ argument.setLabel(head.getLabel());
argument.addToIndexes();
return argument;
}
Copied: ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/util/TestClearNLPAnalysisEngines.java (from r1464565, ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/util/TestClearParserAnalysisEngines.java)
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/util/TestClearNLPAnalysisEngines.java?p2=ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/util/TestClearNLPAnalysisEngines.java&p1=ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/util/TestClearParserAnalysisEngines.java&r1=1464565&r2=1465043&rev=1465043&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/util/TestClearParserAnalysisEngines.java (original)
+++ ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/util/TestClearNLPAnalysisEngines.java Fri Apr 5 16:53:06 2013
@@ -38,21 +38,21 @@ import org.uimafit.pipeline.SimplePipeli
import org.uimafit.util.JCasUtil;
import org.xml.sax.SAXException;
-import org.apache.ctakes.dependency.parser.ae.ClearParserDependencyParserAE;
-import org.apache.ctakes.dependency.parser.ae.ClearParserSemanticRoleLabelerAE;
+import org.apache.ctakes.dependency.parser.ae.ClearNLPDependencyParserAE;
+import org.apache.ctakes.dependency.parser.ae.ClearNLPSemanticRoleLabelerAE;
import org.apache.ctakes.dependency.parser.util.DependencyUtility;
import org.apache.ctakes.dependency.parser.util.SRLUtility;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
/**
- * This class illustrates the pipeline needed to run the ClearParser dependency parser and SRL systems
+ * This class illustrates the pipeline needed to run the ClearNLP dependency parser and SRL systems
* Note: This uses small, highly inaccurate model files, to keep the expense of running down.
* For real applications, use the model files recommended in the README.txt file, or leave the model file
* configuration parameter unspecified
* @author lbecker
*
*/
-public class TestClearParserAnalysisEngines {
+public class TestClearNLPAnalysisEngines {
public static String DEP_DUMMY_MODEL_FILE = "src/resources/dependency/dummy.dep.mod.jar";
public static String SRL_DUMMY_MODEL_FILE = "src/resources/srl/dummy.srl.mod.jar";
public static String INPUT_FILE = "../ctakes-clinical-pipeline/test/data/plaintext/testpatient_plaintext_1.txt";
@@ -68,7 +68,7 @@ public class TestClearParserAnalysisEngi
@Option(name = "-s",
aliases = "--srlModelFile",
- usage = "specify the path to the clearparser srl model file",
+ usage = "specify the path to the ClearNLP srl model file",
required = false)
public File srlModelFile = new File(SRL_DUMMY_MODEL_FILE);
@@ -82,11 +82,11 @@ public class TestClearParserAnalysisEngi
/**
- * Simple inner class for dumping out ClearParser output
+ * Simple inner class for dumping out ClearNLP output
* @author lbecker
*
*/
- public static class DumpClearParserOutputAE extends JCasAnnotator_ImplBase {
+ public static class DumpClearNLPOutputAE extends JCasAnnotator_ImplBase {
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
@@ -126,33 +126,33 @@ public class TestClearParserAnalysisEngi
);
// Load preprocessing pipeline (consists of
- AnalysisEngine preprocessingAE = WriteClearParserDescriptors.getPlaintextAggregateBuilder().createAggregate();
+ AnalysisEngine preprocessingAE = WriteClearNLPDescriptors.getPlaintextAggregateBuilder().createAggregate();
// Create dependency parsers analysis engine
- AnalysisEngine clearparserDepParser = AnalysisEngineFactory.createPrimitive(
- ClearParserDependencyParserAE.class,
+ AnalysisEngine ClearNLPDepParser = AnalysisEngineFactory.createPrimitive(
+ ClearNLPDependencyParserAE.class,
typeSystem,
- ClearParserDependencyParserAE.PARAM_PARSER_MODEL_FILE_NAME,
+ ClearNLPDependencyParserAE.PARAM_PARSER_MODEL_FILE_NAME,
depModelFile.toString()
);
// Create analysis engine for SRL
- AnalysisEngine clearparserSRL = AnalysisEngineFactory.createPrimitive(
- ClearParserSemanticRoleLabelerAE.class,
+ AnalysisEngine ClearNLPSRL = AnalysisEngineFactory.createPrimitive(
+ ClearNLPSemanticRoleLabelerAE.class,
typeSystem,
- ClearParserSemanticRoleLabelerAE.PARAM_PARSER_MODEL_FILE_NAME,
+ ClearNLPSemanticRoleLabelerAE.PARAM_PARSER_MODEL_FILE_NAME,
srlModelFile.toString()
);
- AnalysisEngine dumpClearParserOutput = AnalysisEngineFactory.createPrimitive(
- DumpClearParserOutputAE.class,
+ AnalysisEngine dumpClearNLPOutput = AnalysisEngineFactory.createPrimitive(
+ DumpClearNLPOutputAE.class,
typeSystem);
- SimplePipeline.runPipeline(reader1, preprocessingAE, clearparserDepParser, clearparserSRL, dumpClearParserOutput);
+ SimplePipeline.runPipeline(reader1, preprocessingAE, ClearNLPDepParser, ClearNLPSRL, dumpClearNLPOutput);
}
}
Copied: ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/util/WriteClearNLPDescriptors.java (from r1464565, ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/util/WriteClearParserDescriptors.java)
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/util/WriteClearNLPDescriptors.java?p2=ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/util/WriteClearNLPDescriptors.java&p1=ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/util/WriteClearParserDescriptors.java&r1=1464565&r2=1465043&rev=1465043&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/util/WriteClearParserDescriptors.java (original)
+++ ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/util/WriteClearNLPDescriptors.java Fri Apr 5 16:53:06 2013
@@ -23,8 +23,8 @@ import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
-import org.apache.ctakes.dependency.parser.ae.ClearParserDependencyParserAE;
-import org.apache.ctakes.dependency.parser.ae.ClearParserSemanticRoleLabelerAE;
+import org.apache.ctakes.dependency.parser.ae.ClearNLPDependencyParserAE;
+import org.apache.ctakes.dependency.parser.ae.ClearNLPSemanticRoleLabelerAE;
import org.apache.uima.UIMAException;
import org.apache.uima.UIMAFramework;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
@@ -43,17 +43,17 @@ import org.xml.sax.SAXException;
/**
- * Use this to generate UIMA xml description files for the ClearParser analysis engines.
+ * Use this to generate UIMA xml description files for the ClearNLP analysis engines.
*
*/
-public class WriteClearParserDescriptors {
+public class WriteClearNLPDescriptors {
public static final String SIMPLE_SEGMENTER_PATH = "../ctakes-clinical-pipeline/desc/analysis_engine/SimpleSegmentAnnotator.xml";
public static final String SENTENCE_DETECTOR_PATH="../ctakes-core/desc/analysis_engine/SentenceDetectorAnnotator.xml";
public static final String TOKENIZER_PATH="../ctakes-core/desc/analysis_engine/TokenizerAnnotator.xml";
public static final String LVG_BASE_TOKEN_ANNOTATOR_PATH="desc/analysis_engine/LvgBaseTokenAnnotator.xml";
public static final String POS_TAGGER_PATH="../ctakes-pos-tagger/desc/POSTagger.xml";
- public static final String DEP_NAME="ClearParserDependencyParser";
- public static final String SRL_NAME="ClearParserSRL";
+ public static final String DEP_NAME="ClearNLPDependencyParser";
+ public static final String SRL_NAME="ClearNLPSRL";
public static class Options extends Options_ImplBase {
@Option(name = "-o",
@@ -82,13 +82,13 @@ public class WriteClearParserDescriptors
TypeSystemDescription typeSystem = TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath("../../../ctakes-type-system/desc/common_type_system.xml");
- AnalysisEngineDescription clearparserSRLDesc = AnalysisEngineFactory.createPrimitiveDescription(
- ClearParserSemanticRoleLabelerAE.class,
+ AnalysisEngineDescription ClearNLPSRLDesc = AnalysisEngineFactory.createPrimitiveDescription(
+ ClearNLPSemanticRoleLabelerAE.class,
typeSystem
);
- AnalysisEngineDescription clearparserDepParserDesc = AnalysisEngineFactory.createPrimitiveDescription(
- ClearParserDependencyParserAE.class,
+ AnalysisEngineDescription ClearNLPDepParserDesc = AnalysisEngineFactory.createPrimitiveDescription(
+ ClearNLPDependencyParserAE.class,
typeSystem
);
@@ -96,25 +96,25 @@ public class WriteClearParserDescriptors
System.out.println((new File("desc/analysis_engine")).getAbsolutePath());
// Write standalone description files
- clearparserDepParserDesc.toXML(new FileOutputStream(new File(options.outputRoot, "ClearParserDependencyParserAE.xml")));
- clearparserSRLDesc.toXML(new FileOutputStream(new File(options.outputRoot, "ClearParserSemanticRoleLabelerAE.xml")));
+ ClearNLPDepParserDesc.toXML(new FileOutputStream(new File(options.outputRoot, "ClearNLPDependencyParserAE.xml")));
+ ClearNLPSRLDesc.toXML(new FileOutputStream(new File(options.outputRoot, "ClearNLPSemanticRoleLabelerAE.xml")));
// Write aggregate plaintext description files
AggregateBuilder aggregateBuilder = getPlaintextAggregateBuilder();
- writeAggregateDescriptions(aggregateBuilder, clearparserDepParserDesc, clearparserSRLDesc, options.outputRoot, "PlaintextAggregate.xml");
+ writeAggregateDescriptions(aggregateBuilder, ClearNLPDepParserDesc, ClearNLPSRLDesc, options.outputRoot, "PlaintextAggregate.xml");
// Write aggregate tokenized description files
aggregateBuilder = getTokenizedAggregateBuilder();
- writeAggregateDescriptions(aggregateBuilder, clearparserDepParserDesc, clearparserSRLDesc, options.outputRoot, "TokenizedAggregate.xml");
+ writeAggregateDescriptions(aggregateBuilder, ClearNLPDepParserDesc, ClearNLPSRLDesc, options.outputRoot, "TokenizedAggregate.xml");
// Write aggregate tokenizedInf description files
aggregateBuilder = getTokenizedInfPosAggregateBuilder();
- writeAggregateDescriptions(aggregateBuilder, clearparserDepParserDesc, clearparserSRLDesc, options.outputRoot, "TokenizedInfPosAggregate.xml");
+ writeAggregateDescriptions(aggregateBuilder, ClearNLPDepParserDesc, ClearNLPSRLDesc, options.outputRoot, "TokenizedInfPosAggregate.xml");
}
/**
- * Builds the plaintext prepreprocessing pipeline for ClearParser
+ * Builds the plaintext prepreprocessing pipeline for ClearNLP
* @return
* @throws InvalidXMLException
* @throws IOException
@@ -131,7 +131,7 @@ public class WriteClearParserDescriptors
/**
- * Builds the tokenized preprocessing pipeline for ClearParser
+ * Builds the tokenized preprocessing pipeline for ClearNLP
* @return
* @throws InvalidXMLException
* @throws IOException
@@ -144,7 +144,7 @@ public class WriteClearParserDescriptors
}
/**
- * Builds the tokenizedInf preprocessing for ClearParser
+ * Builds the tokenizedInf preprocessing for ClearNLP
* @return
* @throws InvalidXMLException
* @throws IOException
@@ -175,16 +175,16 @@ public class WriteClearParserDescriptors
private static void writeAggregateDescriptions(
AggregateBuilder preprocessing,
- AnalysisEngineDescription clearparserDepParserDesc,
- AnalysisEngineDescription clearparserSRLDesc,
+ AnalysisEngineDescription ClearNLPDepParserDesc,
+ AnalysisEngineDescription ClearNLPSRLDesc,
File outputRoot,
String aggregateSuffix) throws ResourceInitializationException, FileNotFoundException, SAXException, IOException {
// Append Dependency Parser into aggregate and write description file
- preprocessing.add(clearparserDepParserDesc);
+ preprocessing.add(ClearNLPDepParserDesc);
preprocessing.createAggregateDescription().toXML(new FileOutputStream(new File(outputRoot, DEP_NAME + aggregateSuffix)));
// Append SRL Parser into aggregate and write description file
- preprocessing.add(clearparserSRLDesc);
+ preprocessing.add(ClearNLPSRLDesc);
preprocessing.createAggregateDescription().toXML(new FileOutputStream(new File(outputRoot, SRL_NAME + aggregateSuffix)));
}
Modified: ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/util/ClearDependencyUtility.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/util/ClearDependencyUtility.java?rev=1465043&r1=1465042&r2=1465043&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/util/ClearDependencyUtility.java (original)
+++ ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/util/ClearDependencyUtility.java Fri Apr 5 16:53:06 2013
@@ -21,17 +21,15 @@ package org.apache.ctakes.dependency.par
import java.util.ArrayList;
import java.util.List;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.log4j.Logger;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
-import clear.dep.DepNode;
-import clear.dep.DepTree;
-import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
-import org.apache.ctakes.typesystem.type.syntax.BaseToken;
-import org.apache.ctakes.typesystem.type.textsem.Predicate;
-import org.apache.ctakes.typesystem.type.textsem.SemanticRoleRelation;
-import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import com.googlecode.clearnlp.dependency.DEPNode;
+import com.googlecode.clearnlp.dependency.DEPTree;
/**
* @author m081914
@@ -72,7 +70,7 @@ public class ClearDependencyUtility exte
// }
//
//
- public static ArrayList<ConllDependencyNode> convert(JCas jcas, DepTree clearTree, Sentence sentence, List<BaseToken> tokens) {
+ public static ArrayList<ConllDependencyNode> convert(JCas jcas, DEPTree clearTree, Sentence sentence, List<BaseToken> tokens) {
ArrayList<ConllDependencyNode> uimaNodes = new ArrayList<ConllDependencyNode>(tokens.size()+1);
@@ -93,7 +91,7 @@ public class ClearDependencyUtility exte
for (int i=1; i<clearTree.size(); i++) {
- DepNode clearNode = clearTree.get(i);
+ DEPNode clearNode = clearTree.get(i);
ConllDependencyNode uimaNode = uimaNodes.get(i);
uimaNode.setId(clearNode.id);
@@ -102,8 +100,8 @@ public class ClearDependencyUtility exte
uimaNode.setCpostag(clearNode.pos);
uimaNode.setPostag(clearNode.pos);
uimaNode.setFeats("_");
- uimaNode.setHead(uimaNodes.get(clearNode.headId));
- uimaNode.setDeprel(clearNode.deprel);
+ uimaNode.setHead(uimaNodes.get(clearNode.getHead().id));
+ uimaNode.setDeprel(clearNode.getLabel());
uimaNode.setPhead(null);
uimaNode.setPdeprel("_");
Modified: ctakes/trunk/ctakes-regression-test/desc/analysis_engine/RegressionPipelineAggregateTest.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-regression-test/desc/analysis_engine/RegressionPipelineAggregateTest.xml?rev=1465043&r1=1465042&r2=1465043&view=diff
==============================================================================
--- ctakes/trunk/ctakes-regression-test/desc/analysis_engine/RegressionPipelineAggregateTest.xml (original)
+++ ctakes/trunk/ctakes-regression-test/desc/analysis_engine/RegressionPipelineAggregateTest.xml Fri Apr 5 16:53:06 2013
@@ -70,7 +70,7 @@
</delegateAnalysisEngine>
<!--
<delegateAnalysisEngine key="DependencyParser">
-<import location="../../../ctakes-dependency-parser/desc/analysis_engine/ClearParserDependencyParserAE.xml"/>
+<import location="../../../ctakes-dependency-parser/desc/analysis_engine/ClearNLPDependencyParserAE.xml"/>
</delegateAnalysisEngine>
-->
</delegateAnalysisEngineSpecifiers>
Modified: ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorAggregate.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorAggregate.xml?rev=1465043&r1=1465042&r2=1465043&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorAggregate.xml (original)
+++ ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorAggregate.xml Fri Apr 5 16:53:06 2013
@@ -141,8 +141,8 @@
<delegateAnalysisEngine key="Chunker">
<import location="../../../ctakes-chunker/desc/Chunker.xml"/>
</delegateAnalysisEngine>
- <delegateAnalysisEngine key="ClearParserAE">
- <import location="../../../ctakes-dependency-parser/desc/analysis_engine/ClearParserDependencyParserAE.xml"/>
+ <delegateAnalysisEngine key="ClearNLPAE">
+ <import location="../../../ctakes-dependency-parser/desc/analysis_engine/ClearNLPDependencyParserAE.xml"/>
</delegateAnalysisEngine>
<delegateAnalysisEngine key="DictionaryLookupAnnotatorDB">
<import location="../../../ctakes-dictionary-lookup/desc/analysis_engine/DictionaryLookupAnnotatorUMLS.xml"/>
@@ -197,7 +197,7 @@
<node>ContextDependentTokenizerAnnotator</node>
<node>POSTagger</node>
<node>ConstituencyParser</node>
- <node>ClearParserAE</node>
+ <node>ClearNLPAE</node>
<node>Chunker</node>
<node>AdjustNounPhraseToIncludeFollowingNP</node>
<node>AdjustNounPhraseToIncludeFollowingPPNP</node>
Modified: ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorPreprocessor.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorPreprocessor.xml?rev=1465043&r1=1465042&r2=1465043&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorPreprocessor.xml (original)
+++ ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorPreprocessor.xml Fri Apr 5 16:53:06 2013
@@ -36,8 +36,8 @@
<delegateAnalysisEngine key="Chunker">
<import location="../../../ctakes-chunker/desc/Chunker.xml"/>
</delegateAnalysisEngine>
- <delegateAnalysisEngine key="ClearParserAE">
- <import location="../../../ctakes-dependency-parser/desc/analysis_engine/ClearParserDependencyParserAE.xml"/>
+ <delegateAnalysisEngine key="ClearNLPAE">
+ <import location="../../../ctakes-dependency-parser/desc/analysis_engine/ClearNLPDependencyParserAE.xml"/>
</delegateAnalysisEngine>
<delegateAnalysisEngine key="DictionaryLookupAnnotatorDB">
<import location="../../../ctakes-dictionary-lookup/desc/analysis_engine/DictionaryLookupAnnotatorUMLS.xml"/>
@@ -92,7 +92,7 @@
<node>ContextDependentTokenizerAnnotator</node>
<node>POSTagger</node>
<node>ConstituencyParser</node>
- <node>ClearParserAE</node>
+ <node>ClearNLPAE</node>
<node>Chunker</node>
<node>AdjustNounPhraseToIncludeFollowingNP</node>
<node>AdjustNounPhraseToIncludeFollowingPPNP</node>
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java?rev=1465043&r1=1465042&r2=1465043&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java Fri Apr 5 16:53:06 2013
@@ -38,8 +38,8 @@ import org.apache.ctakes.core.resource.F
import org.apache.ctakes.core.resource.JdbcConnectionResourceImpl;
import org.apache.ctakes.core.resource.LuceneIndexReaderResourceImpl;
import org.apache.ctakes.core.resource.SuffixMaxentModelResourceImpl;
-import org.apache.ctakes.dependency.parser.ae.ClearParserDependencyParserAE;
-import org.apache.ctakes.dependency.parser.ae.ClearParserSemanticRoleLabelerAE;
+import org.apache.ctakes.dependency.parser.ae.ClearNLPDependencyParserAE;
+import org.apache.ctakes.dependency.parser.ae.ClearNLPSemanticRoleLabelerAE;
import org.apache.ctakes.dictionary.lookup.ae.UmlsDictionaryLookupAnnotator;
import org.apache.ctakes.lvg.ae.LvgAnnotator;
import org.apache.ctakes.lvg.resource.LvgCmdApiResourceImpl;
@@ -371,10 +371,10 @@ public abstract class Evaluation_ImplBas
aggregateBuilder.add(lvgAnnotator);
// add dependency parser
- aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearParserDependencyParserAE.class));
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearNLPDependencyParserAE.class));
// add semantic role labeler
- aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearParserSemanticRoleLabelerAE.class));
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearNLPSemanticRoleLabelerAE.class));
// write out the CAS after all the above annotations
aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
Modified: ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/build/BuildPear.dependency parser.properties
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/build/BuildPear.dependency%20parser.properties?rev=1465043&r1=1465042&r2=1465043&view=diff
==============================================================================
--- ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/build/BuildPear.dependency parser.properties (original)
+++ ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/build/BuildPear.dependency parser.properties Fri Apr 5 16:53:06 2013
@@ -19,4 +19,4 @@
project=dependency parser
classpath=$main_root/bin;$main_root/ctakes-clinical-pipeline/bin;$main_root/../ctakes-context-tokenizer/bin;$main_root/resources;$main_root/../ctakes-chunker/bin;$main_root/../ctakes-chunker/resources;$main_root/../ctakes-lvg/bin;$main_root/../ctakes-lvg/lib/lvg2008dist.jar;$main_root/../ctakes-lvg/resources;$main_root/../ctakes-dictionary-lookup/bin;$main_root/../ctakes-dictionary-lookup/resources;$main_root/../ctakes-ne-contexts/bin;$main_root/../ctakes-core/bin;$main_root/../ctakes-core/lib/log4j-1.2.8.jar;$main_root/../ctakes-core/lib/OpenAI_FSM.jar;$main_root/../ctakes-core/lib/opennlp-tools-1.4.3.jar;$main_root/../ctakes-core/lib/maxent-2.5.0.jar;$main_root/../ctakes-core/lib/trove.jar;$main_root/../ctakes-core/lib/lucene-core-3.0.2.jar;$main_root/../ctakes-core/lib/jdom.jar;$main_root/../ctakes-core/resources;$main_root/../ctakes-pos-tagger/bin;$main_root/../ctakes-pos-tagger/resources;$main_root/../ctakes-preprocessor/bin;$main_root/../ctakes-preprocessor/resources;
-aeDescriptor=desc/analysis_engine/ClearParserPlaintextAggregate.xml
\ No newline at end of file
+aeDescriptor=desc/analysis_engine/ClearNLPPlaintextAggregate.xml
\ No newline at end of file
Modified: ctakes/trunk/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/pom.xml?rev=1465043&r1=1465042&r2=1465043&view=diff
==============================================================================
--- ctakes/trunk/pom.xml (original)
+++ ctakes/trunk/pom.xml Fri Apr 5 16:53:06 2013
@@ -546,6 +546,11 @@
<artifactId>lvg2010dist</artifactId>
<version>0.0.1</version>
</dependency>
+ <dependency>
+ <groupId>com.googlecode.clearnlp</groupId>
+ <artifactId>clearnlp</artifactId>
+ <version>1.3.1</version>
+ </dependency>
</dependencies>
</dependencyManagement>
<build>