You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2015/08/18 23:30:19 UTC
svn commit: r1696507 - in /ctakes/sandbox/ctakes-wsd: pom.xml
src/main/java/org/apache/ctakes/consumers/SentencePrinter.java
src/main/java/org/apache/ctakes/pipelines/BasicPipeline.java
Author: dligach
Date: Tue Aug 18 21:30:19 2015
New Revision: 1696507
URL: http://svn.apache.org/r1696507
Log:
switched BasicPipeline to Tim's sentence segmenter that handles line breaks correctly
Modified:
ctakes/sandbox/ctakes-wsd/pom.xml
ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/SentencePrinter.java
ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/BasicPipeline.java
Modified: ctakes/sandbox/ctakes-wsd/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/pom.xml?rev=1696507&r1=1696506&r2=1696507&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/pom.xml (original)
+++ ctakes/sandbox/ctakes-wsd/pom.xml Tue Aug 18 21:30:19 2015
@@ -112,7 +112,12 @@
<artifactId>diffutils</artifactId>
<version>1.3.0</version>
</dependency>
- </dependencies>
+ <dependency>
+ <groupId>org.apache.ctakes</groupId>
+ <artifactId>ctakes-sentdetect-cleartk</artifactId>
+ <version>3.2.3-SNAPSHOT</version>
+ </dependency>
+ </dependencies>
<!-- The below is all necessary to unpack the UMLS resources since they
can't be used from the classpath -->
<build>
Modified: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/SentencePrinter.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/SentencePrinter.java?rev=1696507&r1=1696506&r2=1696507&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/SentencePrinter.java (original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/SentencePrinter.java Tue Aug 18 21:30:19 2015
@@ -68,7 +68,8 @@ public class SentencePrinter {
public void process(JCas jCas) throws AnalysisEngineProcessException {
for(Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
- System.out.println(sentence.getCoveredText());
+ System.out.println("* " + sentence.getCoveredText());
+ System.out.println();
}
}
}
Modified: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/BasicPipeline.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/BasicPipeline.java?rev=1696507&r1=1696506&r2=1696507&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/BasicPipeline.java (original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/BasicPipeline.java Tue Aug 18 21:30:19 2015
@@ -15,13 +15,12 @@ import org.apache.ctakes.chunker.ae.Defa
import org.apache.ctakes.chunker.ae.adjuster.ChunkAdjuster;
import org.apache.ctakes.contexttokenizer.ae.ContextDependentTokenizerAnnotator;
import org.apache.ctakes.core.ae.OverlapAnnotator;
-import org.apache.ctakes.core.ae.SentenceDetector;
import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
+import org.apache.ctakes.core.cleartk.ae.SentenceDetectorAnnotator;
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.core.resource.FileResourceImpl;
import org.apache.ctakes.dependency.parser.ae.ClearNLPDependencyParserAE;
-import org.apache.ctakes.dependency.parser.ae.ClearNLPSemanticRoleLabelerAE;
import org.apache.ctakes.dictionary.lookup2.ae.AbstractJCasTermAnnotator;
import org.apache.ctakes.dictionary.lookup2.ae.DefaultJCasTermAnnotator;
import org.apache.ctakes.dictionary.lookup2.ae.JCasTermAnnotator;
@@ -82,11 +81,9 @@ public class BasicPipeline {
// identify segments
aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(SimpleSegmentAnnotator.class));
- // identify sentences
- aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(
- SentenceDetector.class,
- SentenceDetector.SD_MODEL_FILE_PARAM,
- "org/apache/ctakes/core/sentdetect/sd-med-model.zip"));
+ // identify sentences (use Tim's sentence segmenter that handles line breaks correctly)
+ aggregateBuilder.add(SentenceDetectorAnnotator.getDescription("/org/apache/ctakes/core/sentdetect/model.jar"));
+
// identify tokens
aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(TokenizerAnnotatorPTB.class));
// merge some tokens
@@ -159,8 +156,8 @@ public class BasicPipeline {
// add dependency parser
aggregateBuilder.add(ClearNLPDependencyParserAE.createAnnotatorDescription());
- // add semantic role labeler
- aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(ClearNLPSemanticRoleLabelerAE.class));
+ // add semantic role labeler (removing for now -- crashes after switched to Tim's sentence segmenter)
+ // aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(ClearNLPSemanticRoleLabelerAE.class));
// write out the CAS after all the above annotations
aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(