You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2015/08/23 22:29:54 UTC
svn commit: r1697245 - in /ctakes/trunk/ctakes-dictionary-lookup-fast/src:
main/java/org/apache/ctakes/dictionary/lookup2/ae/
test/java/org/apache/ctakes/dictionary/lookup2/ae/
Author: seanfinan
Date: Sun Aug 23 20:29:53 2015
New Revision: 1697245
URL: http://svn.apache.org/r1697245
Log:
Boy scouting - continued the uima fit ification of dictionary-fast
Modified:
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/AbstractJCasTermAnnotator.java
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/DefaultJCasTermAnnotator.java
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/DictionaryLookupFactory.java
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/JCasTermAnnotator.java
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/OverlapJCasTermAnnotator.java
ctakes/trunk/ctakes-dictionary-lookup-fast/src/test/java/org/apache/ctakes/dictionary/lookup2/ae/TestDictionaryLoadResources.java
Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/AbstractJCasTermAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/AbstractJCasTermAnnotator.java?rev=1697245&r1=1697244&r2=1697245&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/AbstractJCasTermAnnotator.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/AbstractJCasTermAnnotator.java Sun Aug 23 20:29:53 2015
@@ -20,7 +20,6 @@ package org.apache.ctakes.dictionary.loo
import org.apache.ctakes.core.fsm.token.NumberToken;
import org.apache.ctakes.core.resource.FileLocator;
-import org.apache.ctakes.core.resource.FileResource;
import org.apache.ctakes.core.util.JCasUtil;
import org.apache.ctakes.dictionary.lookup2.concept.Concept;
import org.apache.ctakes.dictionary.lookup2.concept.ConceptFactory;
@@ -34,7 +33,6 @@ import org.apache.ctakes.dictionary.look
import org.apache.ctakes.typesystem.type.syntax.*;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.analysis_engine.annotator.AnnotatorContextException;
import org.apache.uima.cas.text.AnnotationFS;
@@ -43,11 +41,9 @@ import org.apache.uima.fit.descriptor.Co
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.jcas.tcas.Annotation;
-import org.apache.uima.resource.ResourceAccessException;
import org.apache.uima.resource.ResourceInitializationException;
-import java.io.File;
-import java.io.FileNotFoundException;
+import java.io.IOException;
import java.io.InputStream;
import java.util.*;
@@ -65,35 +61,28 @@ abstract public class AbstractJCasTermAn
// LOG4J logger based on interface name
final static private Logger LOGGER = Logger.getLogger( "AbstractJCasTermAnnotator" );
- /**
- * specifies the type of window to use for lookup
- */
- public static final String PARAM_WINDOW_ANNOT_PRP = "windowAnnotations";
- /**
- * optional part of speech tags for tokens that should not be used for lookup
- */
- public static final String PARAM_EXC_TAGS_PRP = "exclusionTags";
- /**
- * optional minimum span for tokens that should not be used for lookup
- */
- public static final String PARAM_MIN_SPAN_PRP = "minimumSpan";
-
-
- static private final String DEFAULT_LOOKUP_WINDOW = "org.apache.ctakes.typesystem.type.textspan.Sentence";
- static private final String DEFAULT_EXCLUSION_TAGS
- = "VB,VBD,VBG,VBN,VBP,VBZ,CC,CD,DT,EX,IN,LS,MD,PDT,POS,PP,PP$,PRP,PRP$,RP,TO,WDT,WP,WPS,WRB";
-
+ private int _lookupWindowType;
private DictionarySpec _dictionarySpec;
-
- @ConfigurationParameter(name = JCasTermAnnotator.DICTIONARY_DESCRIPTOR_KEY, mandatory = false, description = "Path to Dictionary spec xml")
- protected String descriptorFilePath;
+ private final Set<String> _exclusionPartsOfSpeech = new HashSet<>();
+
+ @ConfigurationParameter( name = JCasTermAnnotator.DICTIONARY_DESCRIPTOR_KEY, mandatory = false,
+ description = "Path to Dictionary spec xml", defaultValue = DEFAULT_DICT_DESC_PATH )
+ private String _descriptorFilePath;
// type of lookup window to use, typically "LookupWindowAnnotation" or "Sentence"
- private int _lookupWindowType;
+ @ConfigurationParameter( name = JCasTermAnnotator.PARAM_WINDOW_ANNOT_KEY, mandatory = false,
+ description = "Type of Lookup window to use", defaultValue = DEFAULT_LOOKUP_WINDOW )
+ private String _windowClassName;
+
// set of exclusion POS tags (lower cased), may be null
- private final Set<String> _exclusionPartsOfSpeech = new HashSet<>();
- // minimum span required to use token for lookup
- protected int _minimumLookupSpan = 3;
+ @ConfigurationParameter( name = JCasTermAnnotator.PARAM_EXC_TAGS_KEY, mandatory = false,
+ description = "Set of exclusion POS tags", defaultValue = DEFAULT_EXCLUSION_TAGS )
+ private String _exclusionPosTags;
+
+ // minimum span required to accept a term
+ @ConfigurationParameter( name = JCasTermAnnotator.PARAM_MIN_SPAN_KEY, mandatory = false,
+ description = "Minimum number of characters for a term" )
+ protected int _minimumLookupSpan = DEFAULT_MINIMUM_SPAN;
/**
* {@inheritDoc}
@@ -101,42 +90,32 @@ abstract public class AbstractJCasTermAn
@Override
public void initialize( final UimaContext uimaContext ) throws ResourceInitializationException {
super.initialize( uimaContext );
- try {
- String windowClassName = (String)uimaContext.getConfigParameterValue( PARAM_WINDOW_ANNOT_PRP );
- if ( windowClassName == null || windowClassName.isEmpty() ) {
- windowClassName = DEFAULT_LOOKUP_WINDOW;
- }
- LOGGER.info( "Using dictionary lookup window type: " + windowClassName );
- _lookupWindowType = JCasUtil.getType( windowClassName );
- // optional exclusion POS tags
- String exclusionTags = (String)uimaContext.getConfigParameterValue( PARAM_EXC_TAGS_PRP );
- if ( exclusionTags == null ) {
- // Notice that exclusion tags can be set to empty on purpose
- exclusionTags = DEFAULT_EXCLUSION_TAGS;
- }
- final String[] tagArr = exclusionTags.split( "," );
- for ( String tag : tagArr ) {
- _exclusionPartsOfSpeech.add( tag.toUpperCase() );
- }
- final List<String> posList = new ArrayList<>( _exclusionPartsOfSpeech );
- Collections.sort( posList );
- final StringBuilder sb = new StringBuilder();
- for ( String pos : posList ) {
- sb.append( pos ).append( " " );
- }
- LOGGER.info( "Exclusion tagset loaded: " + sb.toString() );
+ LOGGER.info( "Using dictionary lookup window type: " + _windowClassName );
+ _lookupWindowType = JCasUtil.getType( _windowClassName );
+ final String[] tagArr = _exclusionPosTags.split( "," );
+ for ( String tag : tagArr ) {
+ _exclusionPartsOfSpeech.add( tag.toUpperCase() );
+ }
+ final List<String> posList = new ArrayList<>( _exclusionPartsOfSpeech );
+ Collections.sort( posList );
+ final StringBuilder sb = new StringBuilder();
+ for ( String pos : posList ) {
+ sb.append( pos ).append( " " );
+ }
+ LOGGER.info( "Exclusion tagset loaded: " + sb.toString() );
- // optional minimum span, default is 3
- final Object minimumSpan = uimaContext.getConfigParameterValue( PARAM_MIN_SPAN_PRP );
- if ( minimumSpan != null ) {
- _minimumLookupSpan = parseInt( minimumSpan, PARAM_MIN_SPAN_PRP, _minimumLookupSpan );
- }
- LOGGER.info( "Using minimum term text span: " + _minimumLookupSpan );
- InputStream descriptor = FileLocator.getAsStream(descriptorFilePath);
- _dictionarySpec = DictionaryDescriptorParser.parseDescriptor( descriptor, uimaContext );
- } catch ( FileNotFoundException | AnnotatorContextException multE ) {
+ // optional minimum span, default is 3
+ final Object minimumSpan = uimaContext.getConfigParameterValue( PARAM_MIN_SPAN_KEY );
+ if ( minimumSpan != null ) {
+ _minimumLookupSpan = parseInt( minimumSpan, PARAM_MIN_SPAN_KEY, _minimumLookupSpan );
+ }
+ LOGGER.info( "Using minimum term text span: " + _minimumLookupSpan );
+ LOGGER.info( "Using Dictionary Descriptor: " + _descriptorFilePath );
+ try ( InputStream descriptorStream = FileLocator.getAsStream( _descriptorFilePath ) ) {
+ _dictionarySpec = DictionaryDescriptorParser.parseDescriptor( descriptorStream, uimaContext );
+ } catch ( IOException | AnnotatorContextException multE ) {
throw new ResourceInitializationException( multE );
- }
+ }
}
@@ -201,6 +180,7 @@ abstract public class AbstractJCasTermAn
/**
* Skip windows that are section headers/footers. Kludge, but worth doing
+ * todo read these string values as parameters from uimaContext
* {@inheritDoc}
*/
@Override
Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/DefaultJCasTermAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/DefaultJCasTermAnnotator.java?rev=1697245&r1=1697244&r2=1697245&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/DefaultJCasTermAnnotator.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/DefaultJCasTermAnnotator.java Sun Aug 23 20:29:53 2015
@@ -25,6 +25,9 @@ import org.apache.ctakes.dictionary.look
import org.apache.ctakes.dictionary.lookup2.util.FastLookupToken;
import org.apache.ctakes.dictionary.lookup2.util.TokenMatchUtil;
import org.apache.ctakes.dictionary.lookup2.util.collection.CollectionMap;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.resource.ResourceInitializationException;
import java.util.Collection;
import java.util.List;
@@ -76,4 +79,14 @@ final public class DefaultJCasTermAnnota
}
}
+ static public AnalysisEngineDescription createAnnotatorDescription() throws ResourceInitializationException {
+ return AnalysisEngineFactory.createEngineDescription( DefaultJCasTermAnnotator.class );
+ }
+
+ static public AnalysisEngineDescription createAnnotatorDescription( final String descriptorPath )
+ throws ResourceInitializationException {
+ return AnalysisEngineFactory.createEngineDescription( DefaultJCasTermAnnotator.class,
+ DICTIONARY_DESCRIPTOR_KEY, descriptorPath );
+ }
+
}
Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/DictionaryLookupFactory.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/DictionaryLookupFactory.java?rev=1697245&r1=1697244&r2=1697245&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/DictionaryLookupFactory.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/DictionaryLookupFactory.java Sun Aug 23 20:29:53 2015
@@ -5,8 +5,8 @@ import org.apache.uima.analysis_engine.A
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.resource.ResourceInitializationException;
-import java.io.File;
-import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
/**
@@ -19,21 +19,52 @@ final public class DictionaryLookupFacto
private DictionaryLookupFactory() {
}
- public static AnalysisEngineDescription createUmlsDictionaryLookupDescription()
+ /**
+ * @return a description for a
+ * @throws ResourceInitializationException
+ */
+ public static AnalysisEngineDescription createDefaultDictionaryLookupDescription()
throws ResourceInitializationException {
+ return AnalysisEngineFactory.createEngineDescription( DefaultJCasTermAnnotator.class );
+ }
+
+ public static AnalysisEngineDescription createCustomDictionaryLookupDescription( final String descriptorPath )
+ throws ResourceInitializationException {
+ checkDescriptorPath( descriptorPath );
return AnalysisEngineFactory.createEngineDescription( DefaultJCasTermAnnotator.class,
JCasTermAnnotator.DICTIONARY_DESCRIPTOR_KEY,
- "org/apache/ctakes/dictionary/lookup/fast/cTakesHsql.xml" );
+ descriptorPath );
}
- public static AnalysisEngineDescription createCustomDictionaryLookupDescription( final String dictionaryDescriptor )
- throws ResourceInitializationException, FileNotFoundException {
- if ( FileLocator.getAsStream(dictionaryDescriptor) == null ) {
- throw new ResourceInitializationException( new FileNotFoundException( dictionaryDescriptor ) );
- }
- return AnalysisEngineFactory.createEngineDescription( DefaultJCasTermAnnotator.class,
+ public static AnalysisEngineDescription createOverlapDictionaryLookupDescription()
+ throws ResourceInitializationException {
+ return AnalysisEngineFactory.createEngineDescription( OverlapJCasTermAnnotator.class );
+ }
+
+ public static AnalysisEngineDescription createCustomOverlapDictionaryLookupDescription( final String descriptorPath )
+ throws ResourceInitializationException {
+ checkDescriptorPath( descriptorPath );
+ return AnalysisEngineFactory.createEngineDescription( OverlapJCasTermAnnotator.class,
JCasTermAnnotator.DICTIONARY_DESCRIPTOR_KEY,
- dictionaryDescriptor );
+ descriptorPath );
+ }
+
+ /**
+ * Ensure that the given descriptor path is valid; Does not ensure the path points to a file with valid format
+ *
+ * @param descriptorPath -
+ * @throws ResourceInitializationException if an input stream cannot be opened using the given path
+ */
+ static private void checkDescriptorPath( final String descriptorPath ) throws ResourceInitializationException {
+ // At this time, FileLocator.getAsStream() cannot return null, but this may help in the future
+ try ( InputStream descriptorStream = FileLocator.getAsStream( descriptorPath ) ) {
+ if ( descriptorStream == null ) {
+ throw new ResourceInitializationException( new IOException( "Cannot open "
+ + descriptorPath + " as stream" ) );
+ }
+ } catch ( IOException ioE ) {
+ throw new ResourceInitializationException( ioE );
+ }
}
Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/JCasTermAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/JCasTermAnnotator.java?rev=1697245&r1=1697244&r2=1697245&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/JCasTermAnnotator.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/JCasTermAnnotator.java Sun Aug 23 20:29:53 2015
@@ -37,7 +37,26 @@ import java.util.List;
*/
public interface JCasTermAnnotator extends AnalysisComponent {
- static public final String DICTIONARY_DESCRIPTOR_KEY = "DictionaryDescriptor";
+ static final String DICTIONARY_DESCRIPTOR_KEY = "DictionaryDescriptor";
+ /**
+ * specifies the type of window to use for lookup
+ */
+ static final String PARAM_WINDOW_ANNOT_KEY = "windowAnnotations";
+ /**
+ * optional part of speech tags for tokens that should not be used for lookup
+ */
+ static final String PARAM_EXC_TAGS_KEY = "exclusionTags";
+ /**
+ * optional minimum span for tokens that should not be used for lookup
+ */
+ static final String PARAM_MIN_SPAN_KEY = "minimumSpan";
+
+
+ static final String DEFAULT_LOOKUP_WINDOW = "org.apache.ctakes.typesystem.type.textspan.Sentence";
+ static final String DEFAULT_EXCLUSION_TAGS
+ = "VB,VBD,VBG,VBN,VBP,VBZ,CC,CD,DT,EX,IN,LS,MD,PDT,POS,PP,PP$,PRP,PRP$,RP,TO,WDT,WP,WPS,WRB";
+ static final int DEFAULT_MINIMUM_SPAN = 3;
+ static final String DEFAULT_DICT_DESC_PATH = "org/apache/ctakes/dictionary/lookup/fast/cTakesHsql.xml";
/**
* @return Dictionaries that are used by this WindowProcessor
Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/OverlapJCasTermAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/OverlapJCasTermAnnotator.java?rev=1697245&r1=1697244&r2=1697245&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/OverlapJCasTermAnnotator.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/OverlapJCasTermAnnotator.java Sun Aug 23 20:29:53 2015
@@ -27,6 +27,9 @@ import org.apache.ctakes.dictionary.look
import org.apache.ctakes.dictionary.lookup2.util.collection.CollectionMap;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.resource.ResourceInitializationException;
import java.util.ArrayList;
@@ -45,9 +48,6 @@ final public class OverlapJCasTermAnnota
// LOG4J logger based on interface name
final private Logger _logger = Logger.getLogger( "OverlapJCasTermAnnotator" );
- private int _consecutiveSkipMax = 2;
- private int _totalSkipMax = 4;
-
/**
* specifies the number of consecutive non-comma tokens that can be skipped
*/
@@ -58,6 +58,15 @@ final public class OverlapJCasTermAnnota
static private final String TOTAL_SKIP_PRP_KEY = "totalTokenSkips";
+ @ConfigurationParameter( name = CONS_SKIP_PRP_KEY, mandatory = false,
+ description = "Number of consecutive non-comma tokens that can be skipped" )
+ private int _consecutiveSkipMax = 2;
+
+ @ConfigurationParameter( name = TOTAL_SKIP_PRP_KEY, mandatory = false,
+ description = "Number of total tokens that can be skipped" )
+ private int _totalSkipMax = 4;
+
+
/**
* Set the number of consecutive and total tokens that can be skipped (optional). Defaults are 2 and 4.
* {@inheritDoc}
@@ -65,14 +74,6 @@ final public class OverlapJCasTermAnnota
@Override
public void initialize( final UimaContext uimaContext ) throws ResourceInitializationException {
super.initialize( uimaContext );
- final Object consecutiveSkipText = uimaContext.getConfigParameterValue( CONS_SKIP_PRP_KEY );
- if ( consecutiveSkipText != null ) {
- _consecutiveSkipMax = parseInt( consecutiveSkipText, CONS_SKIP_PRP_KEY, _consecutiveSkipMax );
- }
- final Object totalSkipText = uimaContext.getConfigParameterValue( TOTAL_SKIP_PRP_KEY );
- if ( totalSkipText != null ) {
- _totalSkipMax = parseInt( totalSkipText, TOTAL_SKIP_PRP_KEY, _consecutiveSkipMax );
- }
_logger.info( "Maximum consecutive tokens that can be skipped: " + _consecutiveSkipMax );
_logger.info( "Maximum tokens that can be skipped: " + _totalSkipMax );
}
@@ -222,4 +223,24 @@ final public class OverlapJCasTermAnnota
return tokens;
}
+ static public AnalysisEngineDescription createAnnotatorDescription() throws ResourceInitializationException {
+ return AnalysisEngineFactory.createEngineDescription( OverlapJCasTermAnnotator.class );
+ }
+
+ static public AnalysisEngineDescription createAnnotatorDescription( final String descriptorPath )
+ throws ResourceInitializationException {
+ return AnalysisEngineFactory.createEngineDescription( OverlapJCasTermAnnotator.class,
+ DICTIONARY_DESCRIPTOR_KEY, descriptorPath );
+ }
+
+ static public AnalysisEngineDescription createAnnotatorDescription( final String descriptorPath,
+ final int consecutiveSkipMax,
+ final int totalSkipMax )
+ throws ResourceInitializationException {
+ return AnalysisEngineFactory.createEngineDescription( OverlapJCasTermAnnotator.class,
+ DICTIONARY_DESCRIPTOR_KEY, descriptorPath,
+ CONS_SKIP_PRP_KEY, consecutiveSkipMax,
+ TOTAL_SKIP_PRP_KEY, totalSkipMax );
+ }
+
}
Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/test/java/org/apache/ctakes/dictionary/lookup2/ae/TestDictionaryLoadResources.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/test/java/org/apache/ctakes/dictionary/lookup2/ae/TestDictionaryLoadResources.java?rev=1697245&r1=1697244&r2=1697245&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/test/java/org/apache/ctakes/dictionary/lookup2/ae/TestDictionaryLoadResources.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/test/java/org/apache/ctakes/dictionary/lookup2/ae/TestDictionaryLoadResources.java Sun Aug 23 20:29:53 2015
@@ -1,24 +1,91 @@
package org.apache.ctakes.dictionary.lookup2.ae;
+import org.apache.uima.UIMAException;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.factory.JCasFactory;
-import org.apache.uima.fit.factory.TypeSystemDescriptionFactory;
import org.apache.uima.fit.pipeline.SimplePipeline;
import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.metadata.TypeSystemDescription;
+import org.apache.uima.resource.ResourceInitializationException;
import org.junit.Test;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.fail;
+
+
public class TestDictionaryLoadResources {
- @Test
- public void test() throws Exception {
- TypeSystemDescription typeSystem = TypeSystemDescriptionFactory.createTypeSystemDescription();
- JCas jcas = JCasFactory.createJCas();
- jcas.setDocumentText("The quick red fox jumped over cTAKES. Allie had a little lamb; little lamb.");
- //Test had to use custom test config otherwise we'll have to save our umls credentials.
- AnalysisEngineDescription aed = DictionaryLookupFactory
- .createCustomDictionaryLookupDescription("org/apache/ctakes/dictionary/lookup/fast/TestcTakesHsql.xml");
- SimplePipeline.runPipeline(jcas,aed);
- }
+
+ static private final String TEST_TEXT
+ = "The quick red fox jumped over cTAKES. Allie had a little lamb; little lamb.";
+
+ static private final String TEST_CUSTOM_DESC = "org/apache/ctakes/dictionary/lookup/fast/TestcTakesHsql.xml";
+
+ @Test
+ public void testDefaultLookupDescription() {
+ final JCas jcas = createTestJCas();
+ try {
+ //Test had to use custom test config otherwise we'll have to save our umls credentials.
+ final AnalysisEngineDescription aed
+ = DictionaryLookupFactory.createDefaultDictionaryLookupDescription();
+ SimplePipeline.runPipeline( jcas, aed );
+ } catch ( AnalysisEngineProcessException | ResourceInitializationException multE ) {
+ // Since this is a Test, use a fail instead of throwing an exception
+ fail( multE.getMessage() );
+ }
+ }
+
+ @Test
+ public void testCustomLookupDescription() {
+ final JCas jcas = createTestJCas();
+ try {
+ //Test had to use custom test config otherwise we'll have to save our umls credentials.
+ final AnalysisEngineDescription aed
+ = DictionaryLookupFactory.createCustomDictionaryLookupDescription( TEST_CUSTOM_DESC );
+ SimplePipeline.runPipeline( jcas, aed );
+ } catch ( AnalysisEngineProcessException | ResourceInitializationException multE ) {
+ fail( multE.getMessage() );
+ }
+ }
+
+ @Test
+ public void testOverlapLookupDescription() {
+ final JCas jcas = createTestJCas();
+ try {
+ //Test had to use custom test config otherwise we'll have to save our umls credentials.
+ final AnalysisEngineDescription aed
+ = DictionaryLookupFactory.createOverlapDictionaryLookupDescription();
+ SimplePipeline.runPipeline( jcas, aed );
+ } catch ( AnalysisEngineProcessException | ResourceInitializationException multE ) {
+ fail( multE.getMessage() );
+ }
+ }
+
+ @Test
+ public void testCustomOverlapLookupDescription() {
+ final JCas jcas = createTestJCas();
+ try {
+ //Test had to use custom test config otherwise we'll have to save our umls credentials.
+ final AnalysisEngineDescription aed
+ = DictionaryLookupFactory.createCustomOverlapDictionaryLookupDescription( TEST_CUSTOM_DESC );
+ SimplePipeline.runPipeline( jcas, aed );
+ } catch ( AnalysisEngineProcessException | ResourceInitializationException multE ) {
+ fail( multE.getMessage() );
+ }
+ }
+
+
+ static private JCas createTestJCas() {
+// TypeSystemDescription typeSystem = TypeSystemDescriptionFactory.createTypeSystemDescription();
+ JCas jcas = null;
+ try {
+ jcas = JCasFactory.createJCas();
+ jcas.setDocumentText( TEST_TEXT );
+ } catch ( UIMAException uimaE ) {
+ fail( uimaE.getMessage() );
+ }
+ assertNotNull( "JCas could not be created", jcas );
+ return jcas;
+ }
}