You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2015/09/18 18:06:58 UTC

svn commit: r1703897 - in /ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes: pipelines/BasicPipeline.java pipelines/nicu/BasicAnnotations.java semtype/DictionaryLookupPipeline.java

Author: dligach
Date: Fri Sep 18 16:06:58 2015
New Revision: 1703897

URL: http://svn.apache.org/viewvc?rev=1703897&view=rev
Log:
some fixes due to recent fast dict lookup updates

Modified:
    ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/BasicPipeline.java
    ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/nicu/BasicAnnotations.java
    ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/DictionaryLookupPipeline.java

Modified: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/BasicPipeline.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/BasicPipeline.java?rev=1703897&r1=1703896&r2=1703897&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/BasicPipeline.java (original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/BasicPipeline.java Fri Sep 18 16:06:58 2015
@@ -1,7 +1,6 @@
 package org.apache.ctakes.pipelines;
 
 import java.io.File;
-import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
@@ -19,11 +18,7 @@ import org.apache.ctakes.core.ae.SimpleS
 import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
 import org.apache.ctakes.core.cleartk.ae.SentenceDetectorAnnotator;
 import org.apache.ctakes.core.resource.FileLocator;
-import org.apache.ctakes.core.resource.FileResourceImpl;
-import org.apache.ctakes.dependency.parser.ae.ClearNLPDependencyParserAE;
-import org.apache.ctakes.dictionary.lookup2.ae.AbstractJCasTermAnnotator;
 import org.apache.ctakes.dictionary.lookup2.ae.DefaultJCasTermAnnotator;
-import org.apache.ctakes.dictionary.lookup2.ae.JCasTermAnnotator;
 import org.apache.ctakes.lvg.ae.LvgAnnotator;
 import org.apache.ctakes.postagger.POSTagger;
 import org.apache.ctakes.temporal.ae.EventAnnotator;
@@ -41,7 +36,6 @@ import org.apache.uima.fit.component.JCa
 import org.apache.uima.fit.descriptor.ConfigurationParameter;
 import org.apache.uima.fit.factory.AggregateBuilder;
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
-import org.apache.uima.fit.factory.ExternalResourceFactory;
 import org.apache.uima.fit.factory.TypePrioritiesFactory;
 import org.apache.uima.fit.factory.TypeSystemDescriptionFactory;
 import org.apache.uima.fit.pipeline.SimplePipeline;
@@ -59,8 +53,8 @@ import com.google.common.io.CharStreams;
 
 public class BasicPipeline {
 
-  public static File inputDirectory = new File("/Users/dima/Boston/Out/Text/");
-  public static String outputDirectory = "/Users/dima/Boston/Out/Xmi/";
+  public static File inputDirectory = new File("/Volumes/chip-nlp/Groups/QualityMetrics/Text/");
+  public static String outputDirectory = "/Volumes/chip-nlp/Groups/QualityMetrics/Xmi";
 
   public static void main(String[] args) throws Exception {
 
@@ -109,53 +103,39 @@ public class BasicPipeline {
     // identify UMLS named entities
 
     // adjust NP in NP NP to span both
-    aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(
-        ChunkAdjuster.class,
-        ChunkAdjuster.PARAM_CHUNK_PATTERN,
-        new String[] { "NP", "NP" },
-        ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
-        1));
+    aggregateBuilder.add( AnalysisEngineFactory.createEngineDescription(
+          ChunkAdjuster.class,
+          ChunkAdjuster.PARAM_CHUNK_PATTERN,
+          new String[] { "NP", "NP" },
+          ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
+          1 ) );
     // adjust NP in NP PP NP to span all three
-    aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(
-        ChunkAdjuster.class,
-        ChunkAdjuster.PARAM_CHUNK_PATTERN,
-        new String[] { "NP", "PP", "NP" },
-        ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
-        2));
+    aggregateBuilder.add( AnalysisEngineFactory.createEngineDescription(
+          ChunkAdjuster.class,
+          ChunkAdjuster.PARAM_CHUNK_PATTERN,
+          new String[] { "NP", "PP", "NP" },
+          ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
+          2 ) );
     // add lookup windows for each NP
-    aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(CopyNPChunksToLookupWindowAnnotations.class));
+    aggregateBuilder
+          .add( AnalysisEngineFactory.createEngineDescription( CopyNPChunksToLookupWindowAnnotations.class ) );
     // maximize lookup windows
-    aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(
-        OverlapAnnotator.class,
-        "A_ObjectClass",
-        LookupWindowAnnotation.class,
-        "B_ObjectClass",
-        LookupWindowAnnotation.class,
-        "OverlapType",
-        "A_ENV_B",
-        "ActionType",
-        "DELETE",
-        "DeleteAction",
-        new String[] { "selector=B" }));
+    aggregateBuilder.add( AnalysisEngineFactory.createEngineDescription(
+          OverlapAnnotator.class,
+          "A_ObjectClass",
+          LookupWindowAnnotation.class,
+          "B_ObjectClass",
+          LookupWindowAnnotation.class,
+          "OverlapType",
+          "A_ENV_B",
+          "ActionType",
+          "DELETE",
+          "DeleteAction",
+          new String[] { "selector=B" } ) );
     // add UMLS on top of lookup windows
-    try {
-      aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(DefaultJCasTermAnnotator.class,
-          AbstractJCasTermAnnotator.PARAM_WINDOW_ANNOT_PRP,
-          "org.apache.ctakes.typesystem.type.textspan.Sentence",
-          JCasTermAnnotator.DICTIONARY_DESCRIPTOR_KEY,
-          ExternalResourceFactory.createExternalResourceDescription(
-              FileResourceImpl.class,
-              FileLocator.locateFile("org/apache/ctakes/dictionary/lookup/fast/cTakesHsql.xml"))
-          ));
-    } catch (FileNotFoundException e) {
-      e.printStackTrace();
-      throw new ResourceInitializationException(e);
-    }
-
-    aggregateBuilder.add(LvgAnnotator.createAnnotatorDescription());
+    aggregateBuilder.add( DefaultJCasTermAnnotator.createAnnotatorDescription() );
 
-    // add dependency parser
-    aggregateBuilder.add(ClearNLPDependencyParserAE.createAnnotatorDescription());
+    aggregateBuilder.add( LvgAnnotator.createAnnotatorDescription() );
     
     // add event annotation 
     // TODO: make sure all required AEs have been added at this point

Modified: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/nicu/BasicAnnotations.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/nicu/BasicAnnotations.java?rev=1703897&r1=1703896&r2=1703897&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/nicu/BasicAnnotations.java (original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/nicu/BasicAnnotations.java Fri Sep 18 16:06:58 2015
@@ -1,7 +1,6 @@
 package org.apache.ctakes.pipelines.nicu;
 
 import java.io.File;
-import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
@@ -21,10 +20,8 @@ import org.apache.ctakes.core.ae.Sentenc
 import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
 import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
 import org.apache.ctakes.core.resource.FileLocator;
-import org.apache.ctakes.core.resource.FileResourceImpl;
-import org.apache.ctakes.dictionary.lookup2.ae.AbstractJCasTermAnnotator;
 import org.apache.ctakes.dictionary.lookup2.ae.DefaultJCasTermAnnotator;
-import org.apache.ctakes.dictionary.lookup2.ae.JCasTermAnnotator;
+import org.apache.ctakes.lvg.ae.LvgAnnotator;
 import org.apache.ctakes.postagger.POSTagger;
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
 import org.apache.ctakes.typesystem.type.syntax.Chunk;
@@ -40,7 +37,6 @@ import org.apache.uima.fit.component.JCa
 import org.apache.uima.fit.descriptor.ConfigurationParameter;
 import org.apache.uima.fit.factory.AggregateBuilder;
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
-import org.apache.uima.fit.factory.ExternalResourceFactory;
 import org.apache.uima.fit.factory.TypePrioritiesFactory;
 import org.apache.uima.fit.factory.TypeSystemDescriptionFactory;
 import org.apache.uima.fit.pipeline.SimplePipeline;
@@ -110,48 +106,39 @@ public class BasicAnnotations {
     // identify UMLS named entities
 
     // adjust NP in NP NP to span both
-    aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(
-        ChunkAdjuster.class,
-        ChunkAdjuster.PARAM_CHUNK_PATTERN,
-        new String[] { "NP", "NP" },
-        ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
-        1));
+    aggregateBuilder.add( AnalysisEngineFactory.createEngineDescription(
+          ChunkAdjuster.class,
+          ChunkAdjuster.PARAM_CHUNK_PATTERN,
+          new String[] { "NP", "NP" },
+          ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
+          1 ) );
     // adjust NP in NP PP NP to span all three
-    aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(
-        ChunkAdjuster.class,
-        ChunkAdjuster.PARAM_CHUNK_PATTERN,
-        new String[] { "NP", "PP", "NP" },
-        ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
-        2));
+    aggregateBuilder.add( AnalysisEngineFactory.createEngineDescription(
+          ChunkAdjuster.class,
+          ChunkAdjuster.PARAM_CHUNK_PATTERN,
+          new String[] { "NP", "PP", "NP" },
+          ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
+          2 ) );
     // add lookup windows for each NP
-    aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(CopyNPChunksToLookupWindowAnnotations.class));
+    aggregateBuilder
+          .add( AnalysisEngineFactory.createEngineDescription( CopyNPChunksToLookupWindowAnnotations.class ) );
     // maximize lookup windows
-    aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(
-        OverlapAnnotator.class,
-        "A_ObjectClass",
-        LookupWindowAnnotation.class,
-        "B_ObjectClass",
-        LookupWindowAnnotation.class,
-        "OverlapType",
-        "A_ENV_B",
-        "ActionType",
-        "DELETE",
-        "DeleteAction",
-        new String[] { "selector=B" }));
+    aggregateBuilder.add( AnalysisEngineFactory.createEngineDescription(
+          OverlapAnnotator.class,
+          "A_ObjectClass",
+          LookupWindowAnnotation.class,
+          "B_ObjectClass",
+          LookupWindowAnnotation.class,
+          "OverlapType",
+          "A_ENV_B",
+          "ActionType",
+          "DELETE",
+          "DeleteAction",
+          new String[] { "selector=B" } ) );
     // add UMLS on top of lookup windows
-    try {
-      aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(DefaultJCasTermAnnotator.class,
-          AbstractJCasTermAnnotator.PARAM_WINDOW_ANNOT_PRP,
-          "org.apache.ctakes.typesystem.type.textspan.Sentence",
-          JCasTermAnnotator.DICTIONARY_DESCRIPTOR_KEY,
-          ExternalResourceFactory.createExternalResourceDescription(
-              FileResourceImpl.class,
-              FileLocator.locateFile("org/apache/ctakes/dictionary/lookup/fast/cTakesHsql.xml"))
-          ));
-    } catch (FileNotFoundException e) {
-      e.printStackTrace();
-      throw new ResourceInitializationException(e);
-    }
+    aggregateBuilder.add( DefaultJCasTermAnnotator.createAnnotatorDescription() );
+
+    aggregateBuilder.add( LvgAnnotator.createAnnotatorDescription() );
 
     aggregateBuilder.add(PolarityCleartkAnalysisEngine.createAnnotatorDescription());
     aggregateBuilder.add(UncertaintyCleartkAnalysisEngine.createAnnotatorDescription());

Modified: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/DictionaryLookupPipeline.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/DictionaryLookupPipeline.java?rev=1703897&r1=1703896&r2=1703897&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/DictionaryLookupPipeline.java (original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/DictionaryLookupPipeline.java Fri Sep 18 16:06:58 2015
@@ -1,7 +1,6 @@
 package org.apache.ctakes.semtype;
 
 import java.io.File;
-import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
@@ -19,10 +18,8 @@ import org.apache.ctakes.core.ae.Sentenc
 import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
 import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
 import org.apache.ctakes.core.resource.FileLocator;
-import org.apache.ctakes.core.resource.FileResourceImpl;
-import org.apache.ctakes.dictionary.lookup2.ae.AbstractJCasTermAnnotator;
 import org.apache.ctakes.dictionary.lookup2.ae.DefaultJCasTermAnnotator;
-import org.apache.ctakes.dictionary.lookup2.ae.JCasTermAnnotator;
+import org.apache.ctakes.lvg.ae.LvgAnnotator;
 import org.apache.ctakes.postagger.POSTagger;
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
 import org.apache.ctakes.typesystem.type.syntax.Chunk;
@@ -38,7 +35,6 @@ import org.apache.uima.fit.component.JCa
 import org.apache.uima.fit.descriptor.ConfigurationParameter;
 import org.apache.uima.fit.factory.AggregateBuilder;
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
-import org.apache.uima.fit.factory.ExternalResourceFactory;
 import org.apache.uima.fit.factory.TypePrioritiesFactory;
 import org.apache.uima.fit.factory.TypeSystemDescriptionFactory;
 import org.apache.uima.fit.pipeline.SimplePipeline;
@@ -108,48 +104,39 @@ public class DictionaryLookupPipeline {
     // identify UMLS named entities
 
     // adjust NP in NP NP to span both
-    aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(
-        ChunkAdjuster.class,
-        ChunkAdjuster.PARAM_CHUNK_PATTERN,
-        new String[] { "NP", "NP" },
-        ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
-        1));
+    aggregateBuilder.add( AnalysisEngineFactory.createEngineDescription(
+          ChunkAdjuster.class,
+          ChunkAdjuster.PARAM_CHUNK_PATTERN,
+          new String[] { "NP", "NP" },
+          ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
+          1 ) );
     // adjust NP in NP PP NP to span all three
-    aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(
-        ChunkAdjuster.class,
-        ChunkAdjuster.PARAM_CHUNK_PATTERN,
-        new String[] { "NP", "PP", "NP" },
-        ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
-        2));
+    aggregateBuilder.add( AnalysisEngineFactory.createEngineDescription(
+          ChunkAdjuster.class,
+          ChunkAdjuster.PARAM_CHUNK_PATTERN,
+          new String[] { "NP", "PP", "NP" },
+          ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
+          2 ) );
     // add lookup windows for each NP
-    aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(CopyNPChunksToLookupWindowAnnotations.class));
+    aggregateBuilder
+          .add( AnalysisEngineFactory.createEngineDescription( CopyNPChunksToLookupWindowAnnotations.class ) );
     // maximize lookup windows
-    aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(
-        OverlapAnnotator.class,
-        "A_ObjectClass",
-        LookupWindowAnnotation.class,
-        "B_ObjectClass",
-        LookupWindowAnnotation.class,
-        "OverlapType",
-        "A_ENV_B",
-        "ActionType",
-        "DELETE",
-        "DeleteAction",
-        new String[] { "selector=B" }));
+    aggregateBuilder.add( AnalysisEngineFactory.createEngineDescription(
+          OverlapAnnotator.class,
+          "A_ObjectClass",
+          LookupWindowAnnotation.class,
+          "B_ObjectClass",
+          LookupWindowAnnotation.class,
+          "OverlapType",
+          "A_ENV_B",
+          "ActionType",
+          "DELETE",
+          "DeleteAction",
+          new String[] { "selector=B" } ) );
     // add UMLS on top of lookup windows
-    try {
-      aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(DefaultJCasTermAnnotator.class,
-          AbstractJCasTermAnnotator.PARAM_WINDOW_ANNOT_PRP,
-          "org.apache.ctakes.typesystem.type.textspan.Sentence",
-          JCasTermAnnotator.DICTIONARY_DESCRIPTOR_KEY,
-          ExternalResourceFactory.createExternalResourceDescription(
-              FileResourceImpl.class,
-              FileLocator.locateFile("org/apache/ctakes/dictionary/lookup/fast/cTakesHsql.xml"))
-          ));
-    } catch (FileNotFoundException e) {
-      e.printStackTrace();
-      throw new ResourceInitializationException(e);
-    }
+    aggregateBuilder.add( DefaultJCasTermAnnotator.createAnnotatorDescription() );
+
+    aggregateBuilder.add( LvgAnnotator.createAnnotatorDescription() );
 
     // write out the CAS after all the above annotations
     aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(