You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2015/09/18 18:06:58 UTC
svn commit: r1703897 - in
/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes:
pipelines/BasicPipeline.java pipelines/nicu/BasicAnnotations.java
semtype/DictionaryLookupPipeline.java
Author: dligach
Date: Fri Sep 18 16:06:58 2015
New Revision: 1703897
URL: http://svn.apache.org/viewvc?rev=1703897&view=rev
Log:
some fixes due to recent fast dict lookup updates
Modified:
ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/BasicPipeline.java
ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/nicu/BasicAnnotations.java
ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/DictionaryLookupPipeline.java
Modified: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/BasicPipeline.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/BasicPipeline.java?rev=1703897&r1=1703896&r2=1703897&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/BasicPipeline.java (original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/BasicPipeline.java Fri Sep 18 16:06:58 2015
@@ -1,7 +1,6 @@
package org.apache.ctakes.pipelines;
import java.io.File;
-import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
@@ -19,11 +18,7 @@ import org.apache.ctakes.core.ae.SimpleS
import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
import org.apache.ctakes.core.cleartk.ae.SentenceDetectorAnnotator;
import org.apache.ctakes.core.resource.FileLocator;
-import org.apache.ctakes.core.resource.FileResourceImpl;
-import org.apache.ctakes.dependency.parser.ae.ClearNLPDependencyParserAE;
-import org.apache.ctakes.dictionary.lookup2.ae.AbstractJCasTermAnnotator;
import org.apache.ctakes.dictionary.lookup2.ae.DefaultJCasTermAnnotator;
-import org.apache.ctakes.dictionary.lookup2.ae.JCasTermAnnotator;
import org.apache.ctakes.lvg.ae.LvgAnnotator;
import org.apache.ctakes.postagger.POSTagger;
import org.apache.ctakes.temporal.ae.EventAnnotator;
@@ -41,7 +36,6 @@ import org.apache.uima.fit.component.JCa
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.factory.AggregateBuilder;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
-import org.apache.uima.fit.factory.ExternalResourceFactory;
import org.apache.uima.fit.factory.TypePrioritiesFactory;
import org.apache.uima.fit.factory.TypeSystemDescriptionFactory;
import org.apache.uima.fit.pipeline.SimplePipeline;
@@ -59,8 +53,8 @@ import com.google.common.io.CharStreams;
public class BasicPipeline {
- public static File inputDirectory = new File("/Users/dima/Boston/Out/Text/");
- public static String outputDirectory = "/Users/dima/Boston/Out/Xmi/";
+ public static File inputDirectory = new File("/Volumes/chip-nlp/Groups/QualityMetrics/Text/");
+ public static String outputDirectory = "/Volumes/chip-nlp/Groups/QualityMetrics/Xmi";
public static void main(String[] args) throws Exception {
@@ -109,53 +103,39 @@ public class BasicPipeline {
// identify UMLS named entities
// adjust NP in NP NP to span both
- aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(
- ChunkAdjuster.class,
- ChunkAdjuster.PARAM_CHUNK_PATTERN,
- new String[] { "NP", "NP" },
- ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
- 1));
+ aggregateBuilder.add( AnalysisEngineFactory.createEngineDescription(
+ ChunkAdjuster.class,
+ ChunkAdjuster.PARAM_CHUNK_PATTERN,
+ new String[] { "NP", "NP" },
+ ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
+ 1 ) );
// adjust NP in NP PP NP to span all three
- aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(
- ChunkAdjuster.class,
- ChunkAdjuster.PARAM_CHUNK_PATTERN,
- new String[] { "NP", "PP", "NP" },
- ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
- 2));
+ aggregateBuilder.add( AnalysisEngineFactory.createEngineDescription(
+ ChunkAdjuster.class,
+ ChunkAdjuster.PARAM_CHUNK_PATTERN,
+ new String[] { "NP", "PP", "NP" },
+ ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
+ 2 ) );
// add lookup windows for each NP
- aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(CopyNPChunksToLookupWindowAnnotations.class));
+ aggregateBuilder
+ .add( AnalysisEngineFactory.createEngineDescription( CopyNPChunksToLookupWindowAnnotations.class ) );
// maximize lookup windows
- aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(
- OverlapAnnotator.class,
- "A_ObjectClass",
- LookupWindowAnnotation.class,
- "B_ObjectClass",
- LookupWindowAnnotation.class,
- "OverlapType",
- "A_ENV_B",
- "ActionType",
- "DELETE",
- "DeleteAction",
- new String[] { "selector=B" }));
+ aggregateBuilder.add( AnalysisEngineFactory.createEngineDescription(
+ OverlapAnnotator.class,
+ "A_ObjectClass",
+ LookupWindowAnnotation.class,
+ "B_ObjectClass",
+ LookupWindowAnnotation.class,
+ "OverlapType",
+ "A_ENV_B",
+ "ActionType",
+ "DELETE",
+ "DeleteAction",
+ new String[] { "selector=B" } ) );
// add UMLS on top of lookup windows
- try {
- aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(DefaultJCasTermAnnotator.class,
- AbstractJCasTermAnnotator.PARAM_WINDOW_ANNOT_PRP,
- "org.apache.ctakes.typesystem.type.textspan.Sentence",
- JCasTermAnnotator.DICTIONARY_DESCRIPTOR_KEY,
- ExternalResourceFactory.createExternalResourceDescription(
- FileResourceImpl.class,
- FileLocator.locateFile("org/apache/ctakes/dictionary/lookup/fast/cTakesHsql.xml"))
- ));
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- throw new ResourceInitializationException(e);
- }
-
- aggregateBuilder.add(LvgAnnotator.createAnnotatorDescription());
+ aggregateBuilder.add( DefaultJCasTermAnnotator.createAnnotatorDescription() );
- // add dependency parser
- aggregateBuilder.add(ClearNLPDependencyParserAE.createAnnotatorDescription());
+ aggregateBuilder.add( LvgAnnotator.createAnnotatorDescription() );
// add event annotation
// TODO: make sure all required AEs have been added at this point
Modified: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/nicu/BasicAnnotations.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/nicu/BasicAnnotations.java?rev=1703897&r1=1703896&r2=1703897&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/nicu/BasicAnnotations.java (original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/nicu/BasicAnnotations.java Fri Sep 18 16:06:58 2015
@@ -1,7 +1,6 @@
package org.apache.ctakes.pipelines.nicu;
import java.io.File;
-import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
@@ -21,10 +20,8 @@ import org.apache.ctakes.core.ae.Sentenc
import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
import org.apache.ctakes.core.resource.FileLocator;
-import org.apache.ctakes.core.resource.FileResourceImpl;
-import org.apache.ctakes.dictionary.lookup2.ae.AbstractJCasTermAnnotator;
import org.apache.ctakes.dictionary.lookup2.ae.DefaultJCasTermAnnotator;
-import org.apache.ctakes.dictionary.lookup2.ae.JCasTermAnnotator;
+import org.apache.ctakes.lvg.ae.LvgAnnotator;
import org.apache.ctakes.postagger.POSTagger;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.Chunk;
@@ -40,7 +37,6 @@ import org.apache.uima.fit.component.JCa
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.factory.AggregateBuilder;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
-import org.apache.uima.fit.factory.ExternalResourceFactory;
import org.apache.uima.fit.factory.TypePrioritiesFactory;
import org.apache.uima.fit.factory.TypeSystemDescriptionFactory;
import org.apache.uima.fit.pipeline.SimplePipeline;
@@ -110,48 +106,39 @@ public class BasicAnnotations {
// identify UMLS named entities
// adjust NP in NP NP to span both
- aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(
- ChunkAdjuster.class,
- ChunkAdjuster.PARAM_CHUNK_PATTERN,
- new String[] { "NP", "NP" },
- ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
- 1));
+ aggregateBuilder.add( AnalysisEngineFactory.createEngineDescription(
+ ChunkAdjuster.class,
+ ChunkAdjuster.PARAM_CHUNK_PATTERN,
+ new String[] { "NP", "NP" },
+ ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
+ 1 ) );
// adjust NP in NP PP NP to span all three
- aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(
- ChunkAdjuster.class,
- ChunkAdjuster.PARAM_CHUNK_PATTERN,
- new String[] { "NP", "PP", "NP" },
- ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
- 2));
+ aggregateBuilder.add( AnalysisEngineFactory.createEngineDescription(
+ ChunkAdjuster.class,
+ ChunkAdjuster.PARAM_CHUNK_PATTERN,
+ new String[] { "NP", "PP", "NP" },
+ ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
+ 2 ) );
// add lookup windows for each NP
- aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(CopyNPChunksToLookupWindowAnnotations.class));
+ aggregateBuilder
+ .add( AnalysisEngineFactory.createEngineDescription( CopyNPChunksToLookupWindowAnnotations.class ) );
// maximize lookup windows
- aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(
- OverlapAnnotator.class,
- "A_ObjectClass",
- LookupWindowAnnotation.class,
- "B_ObjectClass",
- LookupWindowAnnotation.class,
- "OverlapType",
- "A_ENV_B",
- "ActionType",
- "DELETE",
- "DeleteAction",
- new String[] { "selector=B" }));
+ aggregateBuilder.add( AnalysisEngineFactory.createEngineDescription(
+ OverlapAnnotator.class,
+ "A_ObjectClass",
+ LookupWindowAnnotation.class,
+ "B_ObjectClass",
+ LookupWindowAnnotation.class,
+ "OverlapType",
+ "A_ENV_B",
+ "ActionType",
+ "DELETE",
+ "DeleteAction",
+ new String[] { "selector=B" } ) );
// add UMLS on top of lookup windows
- try {
- aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(DefaultJCasTermAnnotator.class,
- AbstractJCasTermAnnotator.PARAM_WINDOW_ANNOT_PRP,
- "org.apache.ctakes.typesystem.type.textspan.Sentence",
- JCasTermAnnotator.DICTIONARY_DESCRIPTOR_KEY,
- ExternalResourceFactory.createExternalResourceDescription(
- FileResourceImpl.class,
- FileLocator.locateFile("org/apache/ctakes/dictionary/lookup/fast/cTakesHsql.xml"))
- ));
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- throw new ResourceInitializationException(e);
- }
+ aggregateBuilder.add( DefaultJCasTermAnnotator.createAnnotatorDescription() );
+
+ aggregateBuilder.add( LvgAnnotator.createAnnotatorDescription() );
aggregateBuilder.add(PolarityCleartkAnalysisEngine.createAnnotatorDescription());
aggregateBuilder.add(UncertaintyCleartkAnalysisEngine.createAnnotatorDescription());
Modified: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/DictionaryLookupPipeline.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/DictionaryLookupPipeline.java?rev=1703897&r1=1703896&r2=1703897&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/DictionaryLookupPipeline.java (original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/semtype/DictionaryLookupPipeline.java Fri Sep 18 16:06:58 2015
@@ -1,7 +1,6 @@
package org.apache.ctakes.semtype;
import java.io.File;
-import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
@@ -19,10 +18,8 @@ import org.apache.ctakes.core.ae.Sentenc
import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
import org.apache.ctakes.core.resource.FileLocator;
-import org.apache.ctakes.core.resource.FileResourceImpl;
-import org.apache.ctakes.dictionary.lookup2.ae.AbstractJCasTermAnnotator;
import org.apache.ctakes.dictionary.lookup2.ae.DefaultJCasTermAnnotator;
-import org.apache.ctakes.dictionary.lookup2.ae.JCasTermAnnotator;
+import org.apache.ctakes.lvg.ae.LvgAnnotator;
import org.apache.ctakes.postagger.POSTagger;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.Chunk;
@@ -38,7 +35,6 @@ import org.apache.uima.fit.component.JCa
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.factory.AggregateBuilder;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
-import org.apache.uima.fit.factory.ExternalResourceFactory;
import org.apache.uima.fit.factory.TypePrioritiesFactory;
import org.apache.uima.fit.factory.TypeSystemDescriptionFactory;
import org.apache.uima.fit.pipeline.SimplePipeline;
@@ -108,48 +104,39 @@ public class DictionaryLookupPipeline {
// identify UMLS named entities
// adjust NP in NP NP to span both
- aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(
- ChunkAdjuster.class,
- ChunkAdjuster.PARAM_CHUNK_PATTERN,
- new String[] { "NP", "NP" },
- ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
- 1));
+ aggregateBuilder.add( AnalysisEngineFactory.createEngineDescription(
+ ChunkAdjuster.class,
+ ChunkAdjuster.PARAM_CHUNK_PATTERN,
+ new String[] { "NP", "NP" },
+ ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
+ 1 ) );
// adjust NP in NP PP NP to span all three
- aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(
- ChunkAdjuster.class,
- ChunkAdjuster.PARAM_CHUNK_PATTERN,
- new String[] { "NP", "PP", "NP" },
- ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
- 2));
+ aggregateBuilder.add( AnalysisEngineFactory.createEngineDescription(
+ ChunkAdjuster.class,
+ ChunkAdjuster.PARAM_CHUNK_PATTERN,
+ new String[] { "NP", "PP", "NP" },
+ ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
+ 2 ) );
// add lookup windows for each NP
- aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(CopyNPChunksToLookupWindowAnnotations.class));
+ aggregateBuilder
+ .add( AnalysisEngineFactory.createEngineDescription( CopyNPChunksToLookupWindowAnnotations.class ) );
// maximize lookup windows
- aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(
- OverlapAnnotator.class,
- "A_ObjectClass",
- LookupWindowAnnotation.class,
- "B_ObjectClass",
- LookupWindowAnnotation.class,
- "OverlapType",
- "A_ENV_B",
- "ActionType",
- "DELETE",
- "DeleteAction",
- new String[] { "selector=B" }));
+ aggregateBuilder.add( AnalysisEngineFactory.createEngineDescription(
+ OverlapAnnotator.class,
+ "A_ObjectClass",
+ LookupWindowAnnotation.class,
+ "B_ObjectClass",
+ LookupWindowAnnotation.class,
+ "OverlapType",
+ "A_ENV_B",
+ "ActionType",
+ "DELETE",
+ "DeleteAction",
+ new String[] { "selector=B" } ) );
// add UMLS on top of lookup windows
- try {
- aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(DefaultJCasTermAnnotator.class,
- AbstractJCasTermAnnotator.PARAM_WINDOW_ANNOT_PRP,
- "org.apache.ctakes.typesystem.type.textspan.Sentence",
- JCasTermAnnotator.DICTIONARY_DESCRIPTOR_KEY,
- ExternalResourceFactory.createExternalResourceDescription(
- FileResourceImpl.class,
- FileLocator.locateFile("org/apache/ctakes/dictionary/lookup/fast/cTakesHsql.xml"))
- ));
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- throw new ResourceInitializationException(e);
- }
+ aggregateBuilder.add( DefaultJCasTermAnnotator.createAnnotatorDescription() );
+
+ aggregateBuilder.add( LvgAnnotator.createAnnotatorDescription() );
// write out the CAS after all the above annotations
aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(