You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2015/10/06 19:21:32 UTC
svn commit: r1707092 - in
/ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae:
ClearNLPDependencyParserAE.java ClearNLPSemanticRoleLabelerAE.java
Author: tmill
Date: Tue Oct 6 17:21:31 2015
New Revision: 1707092
URL: http://svn.apache.org/viewvc?rev=1707092&view=rev
Log:
Fixes CTAKES-383 -- filter sentence tokens for non-newline tokens.
Modified:
ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearNLPDependencyParserAE.java
ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearNLPSemanticRoleLabelerAE.java
Modified: ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearNLPDependencyParserAE.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearNLPDependencyParserAE.java?rev=1707092&r1=1707091&r2=1707092&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearNLPDependencyParserAE.java (original)
+++ ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearNLPDependencyParserAE.java Tue Oct 6 17:21:31 2015
@@ -25,20 +25,22 @@ import java.util.List;
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.dependency.parser.util.ClearDependencyUtility;
+import org.apache.ctakes.dependency.parser.util.DependencyUtility;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
import com.googlecode.clearnlp.component.AbstractComponent;
import com.googlecode.clearnlp.dependency.DEPFeat;
@@ -144,12 +146,17 @@ public class ClearNLPDependencyParserAE
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
- List<BaseToken> tokens = JCasUtil.selectCovered(jCas, BaseToken.class, sentence);
+ List<BaseToken> printableTokens = new ArrayList<>();
+ for(BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, sentence)){
+ if(token instanceof NewlineToken) continue;
+ printableTokens.add(token);
+ }
+
DEPTree tree = new DEPTree();
// Convert CAS data into structures usable by ClearNLP
- for (int i = 0; i < tokens.size(); i++) {
- BaseToken token = tokens.get(i);
+ for (int i = 0; i < printableTokens.size(); i++) {
+ BaseToken token = printableTokens.get(i);
String lemma = useLemmatizer ? lemmatizer.getLemma(token.getCoveredText(), token.getPartOfSpeech()) : token.getNormalizedForm();
DEPNode node = new DEPNode(i+1, token.getCoveredText(), lemma, token.getPartOfSpeech(), new DEPFeat());
tree.add(node);
@@ -157,8 +164,8 @@ public class ClearNLPDependencyParserAE
// Run parser and convert output back to CAS friendly data types
parser.process(tree);
- ArrayList<ConllDependencyNode> nodes = ClearDependencyUtility.convert(jCas, tree, sentence, tokens);
- ClearDependencyUtility.addToIndexes(jCas, nodes);
+ ArrayList<ConllDependencyNode> nodes = ClearDependencyUtility.convert(jCas, tree, sentence, printableTokens);
+ DependencyUtility.addToIndexes(jCas, nodes);
}
Modified: ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearNLPSemanticRoleLabelerAE.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearNLPSemanticRoleLabelerAE.java?rev=1707092&r1=1707091&r2=1707092&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearNLPSemanticRoleLabelerAE.java (original)
+++ ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearNLPSemanticRoleLabelerAE.java Tue Oct 6 17:21:31 2015
@@ -152,7 +152,11 @@ final String language = AbstractReader.L
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
- List<BaseToken> tokens = JCasUtil.selectCovered(jCas, BaseToken.class, sentence);
+ List<BaseToken> printableTokens = new ArrayList<>();
+ for(BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, sentence)){
+ if(token instanceof NewlineToken) continue;
+ printableTokens.add(token);
+ }
DEPTree tree = new DEPTree();
// Build map between CAS dependency node and id for later creation of
@@ -170,15 +174,15 @@ final String language = AbstractReader.L
}
}
- int[] headIDs = new int[tokens.size()];
- String[] deprels = new String[tokens.size()];
+ int[] headIDs = new int[printableTokens.size()];
+ String[] deprels = new String[printableTokens.size()];
// Initialize Token / Sentence info for the ClearNLP Semantic Role Labeler
// we are filtering out newline tokens
// use idIter as the non-newline token index counter
int idIter = 0;
- for (int i = 0; i < tokens.size(); i++) {
- BaseToken token = tokens.get(i);
+ for (int i = 0; i < printableTokens.size(); i++) {
+ BaseToken token = printableTokens.get(i);
// ignore newline tokens within a sentence - newline = whitespace = non-token
if(!(token instanceof NewlineToken)) {
// Determine HeadId
@@ -222,7 +226,7 @@ final String language = AbstractReader.L
// Convert ClearNLP SRL output to CAS types
- extractSRLInfo(jCas, tokens, tree);
+ extractSRLInfo(jCas, printableTokens, tree);
}