You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2015/07/13 19:50:04 UTC

svn commit: r1690776 - in /ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype: BasicPipeline.java ClinicalConceptViewer.java DictionaryLookupPipeline.java SemanticTypePrinter.java

Author: dligach
Date: Mon Jul 13 17:50:03 2015
New Revision: 1690776

URL: http://svn.apache.org/r1690776
Log:
now iterating over words instead of identified annotations (pretty slow)

Added:
    ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/DictionaryLookupPipeline.java
      - copied, changed from r1690743, ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/BasicPipeline.java
    ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/SemanticTypePrinter.java
      - copied, changed from r1690743, ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/ClinicalConceptViewer.java
Removed:
    ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/BasicPipeline.java
    ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/ClinicalConceptViewer.java

Copied: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/DictionaryLookupPipeline.java (from r1690743, ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/BasicPipeline.java)
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/DictionaryLookupPipeline.java?p2=ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/DictionaryLookupPipeline.java&p1=ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/BasicPipeline.java&r1=1690743&r2=1690776&rev=1690776&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/BasicPipeline.java (original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/DictionaryLookupPipeline.java Mon Jul 13 17:50:03 2015
@@ -23,7 +23,6 @@ import org.apache.ctakes.core.resource.F
 import org.apache.ctakes.dictionary.lookup2.ae.AbstractJCasTermAnnotator;
 import org.apache.ctakes.dictionary.lookup2.ae.DefaultJCasTermAnnotator;
 import org.apache.ctakes.dictionary.lookup2.ae.JCasTermAnnotator;
-import org.apache.ctakes.lvg.ae.LvgAnnotator;
 import org.apache.ctakes.postagger.POSTagger;
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
 import org.apache.ctakes.typesystem.type.syntax.Chunk;
@@ -55,7 +54,7 @@ import org.xml.sax.SAXException;
 
 import com.google.common.io.CharStreams;
 
-public class BasicPipeline {
+public class DictionaryLookupPipeline {
 
   public static File inputDirectory = new File("/Users/dima/Boston/Vectors/SemType/Text/");
   public static String outputDirectory = "/Users/Dima/Boston/Out/";
@@ -152,14 +151,6 @@ public class BasicPipeline {
       throw new ResourceInitializationException(e);
     }
 
-    aggregateBuilder.add(LvgAnnotator.createAnnotatorDescription());
-
-//    // add dependency parser
-//    aggregateBuilder.add(ClearNLPDependencyParserAE.createAnnotatorDescription());
-//    
-//    // add semantic role labeler
-//    aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(ClearNLPSemanticRoleLabelerAE.class));
-
     // write out the CAS after all the above annotations
     aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(
         XMIWriter.class,

Copied: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/SemanticTypePrinter.java (from r1690743, ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/ClinicalConceptViewer.java)
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/SemanticTypePrinter.java?p2=ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/SemanticTypePrinter.java&p1=ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/ClinicalConceptViewer.java&r1=1690743&r2=1690776&rev=1690776&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/ClinicalConceptViewer.java (original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/SemanticTypePrinter.java Mon Jul 13 17:50:03 2015
@@ -19,8 +19,11 @@
 package org.apache.ctakes.semtype;
 
 import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
 
 import org.apache.ctakes.pipelines.Utils;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
 import org.apache.ctakes.typesystem.type.textsem.EntityMention;
 import org.apache.ctakes.typesystem.type.textsem.EventMention;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
@@ -42,7 +45,7 @@ import com.lexicalscope.jewel.cli.Option
  *  
  * @author dmitriy dligach
  */
-public class ClinicalConceptViewer {
+public class SemanticTypePrinter {
   
   static interface Options {
 
@@ -77,16 +80,23 @@ public class ClinicalConceptViewer {
         throw new AnalysisEngineProcessException(e);
       }
 
-      for(IdentifiedAnnotation mention : JCasUtil.select(systemView, EventMention.class)) {
-        String text = mention.getCoveredText().toLowerCase();
-        String semanticType = mention.getClass().getSimpleName();
-        System.out.format("%s|%s\n", text, semanticType);
-      }
-      
-      for(IdentifiedAnnotation mention : JCasUtil.select(systemView, EntityMention.class)) {
-        String text = mention.getCoveredText().toLowerCase();
-        String semanticType = mention.getClass().getSimpleName();
-        System.out.format("%s|%s\n", text, semanticType);
+      for(WordToken word : JCasUtil.select(systemView, WordToken.class)) {
+        List<EntityMention> entities = JCasUtil.selectCovering(systemView, EntityMention.class, word.getBegin(), word.getEnd());
+        List<EventMention> events = JCasUtil.selectCovering(systemView, EventMention.class, word.getBegin(), word.getEnd());
+        List<IdentifiedAnnotation> entitiesAndEvents = new ArrayList<>();
+        entitiesAndEvents.addAll(entities);
+        entitiesAndEvents.addAll(events);
+        if(entitiesAndEvents.size() == 0) {
+          String text = word.getCoveredText();
+          String semanticType = "None";
+          System.out.format("%s|%s\n", text, semanticType);
+        } else {
+          for(IdentifiedAnnotation mention : entitiesAndEvents) {
+            String text = mention.getCoveredText();
+            String semanticType = mention.getClass().getSimpleName();
+            System.out.format("%s|%s\n", text, semanticType);
+          }
+        }
       }
     }
   }