You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2015/07/13 19:50:04 UTC
svn commit: r1690776 - in
/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype:
BasicPipeline.java ClinicalConceptViewer.java DictionaryLookupPipeline.java
SemanticTypePrinter.java
Author: dligach
Date: Mon Jul 13 17:50:03 2015
New Revision: 1690776
URL: http://svn.apache.org/r1690776
Log:
now iterating over words instead of identified annotations (pretty slow)
Added:
ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/DictionaryLookupPipeline.java
- copied, changed from r1690743, ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/BasicPipeline.java
ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/SemanticTypePrinter.java
- copied, changed from r1690743, ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/ClinicalConceptViewer.java
Removed:
ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/BasicPipeline.java
ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/ClinicalConceptViewer.java
Copied: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/DictionaryLookupPipeline.java (from r1690743, ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/BasicPipeline.java)
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/DictionaryLookupPipeline.java?p2=ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/DictionaryLookupPipeline.java&p1=ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/BasicPipeline.java&r1=1690743&r2=1690776&rev=1690776&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/BasicPipeline.java (original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/DictionaryLookupPipeline.java Mon Jul 13 17:50:03 2015
@@ -23,7 +23,6 @@ import org.apache.ctakes.core.resource.F
import org.apache.ctakes.dictionary.lookup2.ae.AbstractJCasTermAnnotator;
import org.apache.ctakes.dictionary.lookup2.ae.DefaultJCasTermAnnotator;
import org.apache.ctakes.dictionary.lookup2.ae.JCasTermAnnotator;
-import org.apache.ctakes.lvg.ae.LvgAnnotator;
import org.apache.ctakes.postagger.POSTagger;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.Chunk;
@@ -55,7 +54,7 @@ import org.xml.sax.SAXException;
import com.google.common.io.CharStreams;
-public class BasicPipeline {
+public class DictionaryLookupPipeline {
public static File inputDirectory = new File("/Users/dima/Boston/Vectors/SemType/Text/");
public static String outputDirectory = "/Users/Dima/Boston/Out/";
@@ -152,14 +151,6 @@ public class BasicPipeline {
throw new ResourceInitializationException(e);
}
- aggregateBuilder.add(LvgAnnotator.createAnnotatorDescription());
-
-// // add dependency parser
-// aggregateBuilder.add(ClearNLPDependencyParserAE.createAnnotatorDescription());
-//
-// // add semantic role labeler
-// aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(ClearNLPSemanticRoleLabelerAE.class));
-
// write out the CAS after all the above annotations
aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(
XMIWriter.class,
Copied: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/SemanticTypePrinter.java (from r1690743, ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/ClinicalConceptViewer.java)
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/SemanticTypePrinter.java?p2=ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/SemanticTypePrinter.java&p1=ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/ClinicalConceptViewer.java&r1=1690743&r2=1690776&rev=1690776&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/ClinicalConceptViewer.java (original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/SemanticTypePrinter.java Mon Jul 13 17:50:03 2015
@@ -19,8 +19,11 @@
package org.apache.ctakes.semtype;
import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
import org.apache.ctakes.pipelines.Utils;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
import org.apache.ctakes.typesystem.type.textsem.EntityMention;
import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
@@ -42,7 +45,7 @@ import com.lexicalscope.jewel.cli.Option
*
* @author dmitriy dligach
*/
-public class ClinicalConceptViewer {
+public class SemanticTypePrinter {
static interface Options {
@@ -77,16 +80,23 @@ public class ClinicalConceptViewer {
throw new AnalysisEngineProcessException(e);
}
- for(IdentifiedAnnotation mention : JCasUtil.select(systemView, EventMention.class)) {
- String text = mention.getCoveredText().toLowerCase();
- String semanticType = mention.getClass().getSimpleName();
- System.out.format("%s|%s\n", text, semanticType);
- }
-
- for(IdentifiedAnnotation mention : JCasUtil.select(systemView, EntityMention.class)) {
- String text = mention.getCoveredText().toLowerCase();
- String semanticType = mention.getClass().getSimpleName();
- System.out.format("%s|%s\n", text, semanticType);
+ for(WordToken word : JCasUtil.select(systemView, WordToken.class)) {
+ List<EntityMention> entities = JCasUtil.selectCovering(systemView, EntityMention.class, word.getBegin(), word.getEnd());
+ List<EventMention> events = JCasUtil.selectCovering(systemView, EventMention.class, word.getBegin(), word.getEnd());
+ List<IdentifiedAnnotation> entitiesAndEvents = new ArrayList<>();
+ entitiesAndEvents.addAll(entities);
+ entitiesAndEvents.addAll(events);
+ if(entitiesAndEvents.size() == 0) {
+ String text = word.getCoveredText();
+ String semanticType = "None";
+ System.out.format("%s|%s\n", text, semanticType);
+ } else {
+ for(IdentifiedAnnotation mention : entitiesAndEvents) {
+ String text = mention.getCoveredText();
+ String semanticType = mention.getClass().getSimpleName();
+ System.out.format("%s|%s\n", text, semanticType);
+ }
+ }
}
}
}