You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2017/11/26 15:56:24 UTC
svn commit: r1816385 [1/2] - in /ctakes/trunk:
ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/pipeline/
ctakes-chunker-res/src/main/resources/org/apache/ctakes/chunker/pipeline/
ctakes-chunker/src/main/java/org/apache/ctakes/chunke...
Author: seanfinan
Date: Sun Nov 26 15:56:23 2017
New Revision: 1816385
URL: http://svn.apache.org/viewvc?rev=1816385&view=rev
Log:
CTAKES-485 : Add thread safe default clinical pipeline
Added:
ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/pipeline/TsAttributeCleartkSubPipe.piper
- copied, changed from r1816235, ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/pipeline/AttributeCleartkSubPipe.piper
ctakes/trunk/ctakes-chunker-res/src/main/resources/org/apache/ctakes/chunker/pipeline/TsChunkerSubPipe.piper
- copied, changed from r1815683, ctakes/trunk/ctakes-chunker-res/src/main/resources/org/apache/ctakes/chunker/pipeline/ChunkerSubPipe.piper
ctakes/trunk/ctakes-chunker/src/main/java/org/apache/ctakes/chunker/concurrent/
ctakes/trunk/ctakes-chunker/src/main/java/org/apache/ctakes/chunker/concurrent/ThreadSafeChunker.java
ctakes/trunk/ctakes-clinical-pipeline-res/src/main/resources/org/apache/ctakes/clinical/pipeline/TsDefaultFastPipeline.piper
- copied, changed from r1816235, ctakes/trunk/ctakes-clinical-pipeline-res/src/main/resources/org/apache/ctakes/clinical/pipeline/DefaultFastPipeline.piper
ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/concurrent/
ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/concurrent/ThreadSafeConstituencyParser.java
ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/pipeline/TsDefaultTokenizerPipeline.piper
- copied, changed from r1816235, ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/pipeline/DefaultTokenizerPipeline.piper
ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/pipeline/TsFullTokenizerPipeline.piper
- copied, changed from r1815683, ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/pipeline/AdvancedTokenizerPipeline.piper
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/concurrent/
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/concurrent/ThreadSafeSentenceDetector.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/concurrent/ThreadSafeSentenceDetectorBio.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/concurrent/ThreadSafeWrapper.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipeBitLocator.java
ctakes/trunk/ctakes-coreference-res/src/main/resources/org/apache/ctakes/coreference/pipeline/TsCorefSubPipe.piper
- copied, changed from r1815683, ctakes/trunk/ctakes-coreference-res/src/main/resources/org/apache/ctakes/coreference/pipeline/CorefSubPipe.piper
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/concurrent/
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/concurrent/ThreadSafeMarkableSalienceAnnotator.java
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/concurrent/ThreadSafeMentionClusterCoreferencer.java
ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/concurrent/
ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/concurrent/ThreadSafeClearNlpDepParser.java
ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/concurrent/ThreadSafeClearNlpSemRoleLabeler.java
ctakes/trunk/ctakes-dictionary-lookup-fast-res/src/main/resources/org/apache/ctakes/dictionary/lookup/fast/pipeline/TsDictionarySubPipe.piper
- copied, changed from r1815683, ctakes/trunk/ctakes-dictionary-lookup-fast-res/src/main/resources/org/apache/ctakes/dictionary/lookup/fast/pipeline/DictionarySubPipe.piper
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/concurrent/
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/concurrent/ThreadSafeFastLookup.java
ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/pipeline/FullPipeline.piper
ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/pipeline/ThreadSafeFullPipeline.piper
ctakes/trunk/ctakes-pos-tagger/src/main/java/org/apache/ctakes/postagger/concurrent/
ctakes/trunk/ctakes-pos-tagger/src/main/java/org/apache/ctakes/postagger/concurrent/ThreadSafePosTagger.java
ctakes/trunk/ctakes-relation-extractor-res/src/main/resources/org/apache/ctakes/relationextractor/pipeline/TsRelationSubPipe.piper
- copied, changed from r1815683, ctakes/trunk/ctakes-relation-extractor-res/src/main/resources/org/apache/ctakes/relationextractor/pipeline/RelationSubPipe.piper
ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/concurrent/
ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/concurrent/ThreadSafeDegreeExtractor.java
ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/concurrent/ThreadSafeLocationExtractor.java
ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/concurrent/ThreadSafeModifierExtractor.java
ctakes/trunk/ctakes-temporal-res/src/main/resources/org/apache/ctakes/temporal/pipeline/TsTemporalSubPipe.piper
- copied, changed from r1815683, ctakes/trunk/ctakes-temporal-res/src/main/resources/org/apache/ctakes/temporal/pipeline/TemporalSubPipe.piper
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/concurrent/
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/concurrent/ThreadSafeBackTimeAnnotator.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/concurrent/ThreadSafeDocTimeRelAnnotator.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/concurrent/ThreadSafeEventAnnotator.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/concurrent/ThreadSafeEventEventRelAnnotator.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/concurrent/ThreadSafeEventTimeRelAnnotator.java
Modified:
ctakes/trunk/ctakes-clinical-pipeline-res/src/main/resources/org/apache/ctakes/clinical/pipeline/DefaultFastPipeline.piper
ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/pipeline/AdvancedTokenizerPipeline.piper
ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/sections/DefaultSectionRegex.bsv
ctakes/trunk/ctakes-core/pom.xml
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/BsvRegexSectionizer.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/RegexSectionizer.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/html/HtmlTextWriter.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineBuilder.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PiperFileReader.java
ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearNLPDependencyParserAE.java
ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearNLPSemanticRoleLabelerAE.java
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/DefaultJCasTermAnnotator.java
ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/pipeline/HelloWorldCui.piper
ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/pipeline/ProcessDir.piper
Copied: ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/pipeline/TsAttributeCleartkSubPipe.piper (from r1816235, ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/pipeline/AttributeCleartkSubPipe.piper)
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/pipeline/TsAttributeCleartkSubPipe.piper?p2=ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/pipeline/TsAttributeCleartkSubPipe.piper&p1=ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/pipeline/AttributeCleartkSubPipe.piper&r1=1816235&r2=1816385&rev=1816385&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/pipeline/AttributeCleartkSubPipe.piper (original)
+++ ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/pipeline/TsAttributeCleartkSubPipe.piper Sun Nov 26 15:56:23 2017
@@ -1,9 +1,9 @@
// Commands and parameters to create a default entity attributes processing sub-pipeline. This is not a full pipeline.
// Add the Dependency parser for use by cleartk
-addDescription ClearNLPDependencyParserAE
+addDescription concurrent.ThreadSafeClearNlpDepParser
// Add the Semantic Role Labeler parser for use by cleartk
-addLogged ClearNLPSemanticRoleLabelerAE
+addDescription concurrent.ThreadSafeClearNlpSemRoleLabeler
// Add the cleartk package for cleartk class lookups
package org.apache.ctakes.assertion.medfacts.cleartk
Copied: ctakes/trunk/ctakes-chunker-res/src/main/resources/org/apache/ctakes/chunker/pipeline/TsChunkerSubPipe.piper (from r1815683, ctakes/trunk/ctakes-chunker-res/src/main/resources/org/apache/ctakes/chunker/pipeline/ChunkerSubPipe.piper)
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-chunker-res/src/main/resources/org/apache/ctakes/chunker/pipeline/TsChunkerSubPipe.piper?p2=ctakes/trunk/ctakes-chunker-res/src/main/resources/org/apache/ctakes/chunker/pipeline/TsChunkerSubPipe.piper&p1=ctakes/trunk/ctakes-chunker-res/src/main/resources/org/apache/ctakes/chunker/pipeline/ChunkerSubPipe.piper&r1=1815683&r2=1816385&rev=1816385&view=diff
==============================================================================
--- ctakes/trunk/ctakes-chunker-res/src/main/resources/org/apache/ctakes/chunker/pipeline/ChunkerSubPipe.piper (original)
+++ ctakes/trunk/ctakes-chunker-res/src/main/resources/org/apache/ctakes/chunker/pipeline/TsChunkerSubPipe.piper Sun Nov 26 15:56:23 2017
@@ -1,5 +1,5 @@
// Commands and parameters to create a default chunker processing sub-pipeline. This is not a full pipeline.
-add Chunker
+add concurrent.ThreadSafeChunker
addDescription adjuster.ChunkAdjuster NP,NP 1
addDescription adjuster.ChunkAdjuster NP,PP,NP 2
Added: ctakes/trunk/ctakes-chunker/src/main/java/org/apache/ctakes/chunker/concurrent/ThreadSafeChunker.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-chunker/src/main/java/org/apache/ctakes/chunker/concurrent/ThreadSafeChunker.java?rev=1816385&view=auto
==============================================================================
--- ctakes/trunk/ctakes-chunker/src/main/java/org/apache/ctakes/chunker/concurrent/ThreadSafeChunker.java (added)
+++ ctakes/trunk/ctakes-chunker/src/main/java/org/apache/ctakes/chunker/concurrent/ThreadSafeChunker.java Sun Nov 26 15:56:23 2017
@@ -0,0 +1,106 @@
+package org.apache.ctakes.chunker.concurrent;
+
+import org.apache.ctakes.chunker.ae.Chunker;
+import org.apache.ctakes.core.concurrent.ThreadSafeWrapper;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+
+/**
+ * Normally I would use composition and a singleton, but here extension is done for @ConfigurationParameter discovery.
+ * Made a singleton mostly for model memory.
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 11/20/2017
+ */
+@PipeBitInfo(
+ name = "Thread safe Chunker",
+ description = "Annotator that generates chunks of any kind as specified by the chunker model and the chunk creator.",
+ role = PipeBitInfo.Role.ANNOTATOR,
+ dependencies = { PipeBitInfo.TypeProduct.SENTENCE, PipeBitInfo.TypeProduct.BASE_TOKEN },
+ products = { PipeBitInfo.TypeProduct.CHUNK }
+)
+final public class ThreadSafeChunker extends Chunker {
+
+ static private final Logger LOGGER = Logger.getLogger( "ThreadSafeChunker" );
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void initialize( final UimaContext context ) throws ResourceInitializationException {
+ ChunkerSingleton.getInstance().initialize( context );
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void process( final JCas jCas ) throws AnalysisEngineProcessException {
+ ChunkerSingleton.getInstance().process( jCas );
+ }
+
+ /**
+ * @return a chunker using a default model
+ * @throws ResourceInitializationException -
+ */
+ public static AnalysisEngineDescription createAnnotatorDescription() throws ResourceInitializationException {
+ return AnalysisEngineFactory.createEngineDescription( ThreadSafeChunker.class );
+ }
+
+ /**
+ * @param model a chunker model
+ * @return a chunker using the given model
+ * @throws ResourceInitializationException -
+ */
+ public static AnalysisEngineDescription createAnnotatorDescription( final String model ) throws ResourceInitializationException {
+ return AnalysisEngineFactory.createEngineDescription( ThreadSafeChunker.class,
+ Chunker.PARAM_CHUNKER_MODEL_FILE, model );
+ }
+
+
+ private enum ChunkerSingleton implements ThreadSafeWrapper<Chunker> {
+ INSTANCE;
+
+ static public ChunkerSingleton getInstance() {
+ return INSTANCE;
+ }
+
+ private final Chunker _delegate;
+ private boolean _initialized;
+
+ ChunkerSingleton() {
+ _delegate = new Chunker();
+ }
+
+ final private Object LOCK = new Object();
+
+ @Override
+ public Object getLock() {
+ return LOCK;
+ }
+
+ @Override
+ public Chunker getDelegate() {
+ return _delegate;
+ }
+
+ @Override
+ public boolean isInitialized() {
+ return _initialized;
+ }
+
+ @Override
+ public void setInitialized( final boolean initialized ) {
+ _initialized = initialized;
+ }
+ }
+
+
+}
Modified: ctakes/trunk/ctakes-clinical-pipeline-res/src/main/resources/org/apache/ctakes/clinical/pipeline/DefaultFastPipeline.piper
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-clinical-pipeline-res/src/main/resources/org/apache/ctakes/clinical/pipeline/DefaultFastPipeline.piper?rev=1816385&r1=1816384&r2=1816385&view=diff
==============================================================================
--- ctakes/trunk/ctakes-clinical-pipeline-res/src/main/resources/org/apache/ctakes/clinical/pipeline/DefaultFastPipeline.piper (original)
+++ ctakes/trunk/ctakes-clinical-pipeline-res/src/main/resources/org/apache/ctakes/clinical/pipeline/DefaultFastPipeline.piper Sun Nov 26 15:56:23 2017
@@ -1,17 +1,17 @@
// Commands and parameters to create a default plaintext document processing pipeline with UMLS lookup
// Load a simple token processing pipeline from another pipeline file
-load DefaultTokenizerPipeline.piper
+load DefaultTokenizerPipeline
// Add non-core annotators
add ContextDependentTokenizerAnnotator
addDescription POSTagger
// Add Chunkers
-load ChunkerSubPipe.piper
+load ChunkerSubPipe
// Default fast dictionary lookup
-load DictionarySubPipe.piper
+load DictionarySubPipe
// Add Cleartk Entity Attribute annotators
-load AttributeCleartkSubPipe.piper
+load AttributeCleartkSubPipe
Copied: ctakes/trunk/ctakes-clinical-pipeline-res/src/main/resources/org/apache/ctakes/clinical/pipeline/TsDefaultFastPipeline.piper (from r1816235, ctakes/trunk/ctakes-clinical-pipeline-res/src/main/resources/org/apache/ctakes/clinical/pipeline/DefaultFastPipeline.piper)
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-clinical-pipeline-res/src/main/resources/org/apache/ctakes/clinical/pipeline/TsDefaultFastPipeline.piper?p2=ctakes/trunk/ctakes-clinical-pipeline-res/src/main/resources/org/apache/ctakes/clinical/pipeline/TsDefaultFastPipeline.piper&p1=ctakes/trunk/ctakes-clinical-pipeline-res/src/main/resources/org/apache/ctakes/clinical/pipeline/DefaultFastPipeline.piper&r1=1816235&r2=1816385&rev=1816385&view=diff
==============================================================================
--- ctakes/trunk/ctakes-clinical-pipeline-res/src/main/resources/org/apache/ctakes/clinical/pipeline/DefaultFastPipeline.piper (original)
+++ ctakes/trunk/ctakes-clinical-pipeline-res/src/main/resources/org/apache/ctakes/clinical/pipeline/TsDefaultFastPipeline.piper Sun Nov 26 15:56:23 2017
@@ -1,17 +1,20 @@
// Commands and parameters to create a default plaintext document processing pipeline with UMLS lookup
+// set the thread count
+threads 3
+
// Load a simple token processing pipeline from another pipeline file
-load DefaultTokenizerPipeline.piper
+load TsDefaultTokenizerPipeline
// Add non-core annotators
add ContextDependentTokenizerAnnotator
addDescription POSTagger
// Add Chunkers
-load ChunkerSubPipe.piper
+load TsChunkerSubPipe
// Default fast dictionary lookup
-load DictionarySubPipe.piper
+load TsDictionarySubPipe
// Add Cleartk Entity Attribute annotators
-load AttributeCleartkSubPipe.piper
+load TsAttributeCleartkSubPipe
Added: ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/concurrent/ThreadSafeConstituencyParser.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/concurrent/ThreadSafeConstituencyParser.java?rev=1816385&view=auto
==============================================================================
--- ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/concurrent/ThreadSafeConstituencyParser.java (added)
+++ ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/concurrent/ThreadSafeConstituencyParser.java Sun Nov 26 15:56:23 2017
@@ -0,0 +1,100 @@
+package org.apache.ctakes.constituency.parser.concurrent;
+
+import org.apache.ctakes.constituency.parser.ae.ConstituencyParser;
+import org.apache.ctakes.core.concurrent.ThreadSafeWrapper;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+
+import java.util.logging.Logger;
+
+/**
+ * Normally I would use composition and a singleton, but here extension is done for @ConfigurationParameter discovery.
+ * Made a singleton mostly for model memory.
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 11/20/2017
+ */
+@PipeBitInfo(
+ name = "Thread safe Constituency Parser",
+ description = "Adds Terminal Treebank Nodes, necessary for Coreference Markables.",
+ dependencies = { PipeBitInfo.TypeProduct.DOCUMENT_ID, PipeBitInfo.TypeProduct.SENTENCE },
+ products = { PipeBitInfo.TypeProduct.TREE_NODE }
+)
+public class ThreadSafeConstituencyParser extends ConstituencyParser {
+
+ static private final Logger LOGGER = Logger.getLogger( "ThreadSafeConstituencyParser" );
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void initialize( final UimaContext context ) throws ResourceInitializationException {
+ CpSingleton.getInstance().initialize( context );
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void process( final JCas jCas ) throws AnalysisEngineProcessException {
+ CpSingleton.getInstance().process( jCas );
+ }
+
+ public static AnalysisEngineDescription createAnnotatorDescription( final String modelPath )
+ throws ResourceInitializationException {
+ return AnalysisEngineFactory.createEngineDescription(
+ ThreadSafeConstituencyParser.class,
+ ConstituencyParser.PARAM_MODEL_FILENAME, modelPath );
+ }
+
+ public static AnalysisEngineDescription createAnnotatorDescription()
+ throws ResourceInitializationException {
+ return AnalysisEngineFactory.createEngineDescription( ThreadSafeConstituencyParser.class );
+ }
+
+
+ private enum CpSingleton implements ThreadSafeWrapper<ConstituencyParser> {
+ INSTANCE;
+
+ static public CpSingleton getInstance() {
+ return INSTANCE;
+ }
+
+ private final ConstituencyParser _delegate;
+ private boolean _initialized;
+
+ CpSingleton() {
+ _delegate = new ConstituencyParser();
+ }
+
+ final private Object LOCK = new Object();
+
+ @Override
+ public Object getLock() {
+ return LOCK;
+ }
+
+ @Override
+ public ConstituencyParser getDelegate() {
+ return _delegate;
+ }
+
+ @Override
+ public boolean isInitialized() {
+ return _initialized;
+ }
+
+ @Override
+ public void setInitialized( final boolean initialized ) {
+ _initialized = initialized;
+ }
+ }
+
+
+}
Modified: ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/pipeline/AdvancedTokenizerPipeline.piper
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/pipeline/AdvancedTokenizerPipeline.piper?rev=1816385&r1=1816384&r2=1816385&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/pipeline/AdvancedTokenizerPipeline.piper (original)
+++ ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/pipeline/AdvancedTokenizerPipeline.piper Sun Nov 26 15:56:23 2017
@@ -4,11 +4,11 @@
add BsvRegexSectionizer
// The sentence detector needs our custom model path, otherwise default values are used.
-addLogged SentenceDetectorAnnotatorBIO classifierJarPath=/org/apache/ctakes/core/sentdetect/model.jar
+//addLogged SentenceDetectorAnnotatorBIO classifierJarPath=/org/apache/ctakes/core/sentdetect/model.jar
// The SentenceDetectorAnnotatorBIO is a "lumper" that works well for notes in which end of line does not indicate a sentence.
// If that is not your case, then you may get better results using the more standard SentenceDetector
-// add SentenceDetector
+ add SentenceDetector
// By default, paragraphs are parsed using empty lines as separators and Part #:
add ParagraphAnnotator
Copied: ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/pipeline/TsDefaultTokenizerPipeline.piper (from r1816235, ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/pipeline/DefaultTokenizerPipeline.piper)
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/pipeline/TsDefaultTokenizerPipeline.piper?p2=ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/pipeline/TsDefaultTokenizerPipeline.piper&p1=ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/pipeline/DefaultTokenizerPipeline.piper&r1=1816235&r2=1816385&rev=1816385&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/pipeline/DefaultTokenizerPipeline.piper (original)
+++ ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/pipeline/TsDefaultTokenizerPipeline.piper Sun Nov 26 15:56:23 2017
@@ -1,5 +1,5 @@
// Commands and parameters to create a default token processing pipeline
add SimpleSegmentAnnotator
-add SentenceDetector
+add concurrent.ThreadSafeSentenceDetector
add TokenizerAnnotatorPTB
Copied: ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/pipeline/TsFullTokenizerPipeline.piper (from r1815683, ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/pipeline/AdvancedTokenizerPipeline.piper)
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/pipeline/TsFullTokenizerPipeline.piper?p2=ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/pipeline/TsFullTokenizerPipeline.piper&p1=ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/pipeline/AdvancedTokenizerPipeline.piper&r1=1815683&r2=1816385&rev=1816385&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/pipeline/AdvancedTokenizerPipeline.piper (original)
+++ ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/pipeline/TsFullTokenizerPipeline.piper Sun Nov 26 15:56:23 2017
@@ -4,11 +4,11 @@
add BsvRegexSectionizer
// The sentence detector needs our custom model path, otherwise default values are used.
-addLogged SentenceDetectorAnnotatorBIO classifierJarPath=/org/apache/ctakes/core/sentdetect/model.jar
+//add concurrent.ThreadSafeSentenceDetectorBio classifierJarPath=/org/apache/ctakes/core/sentdetect/model.jar
// The SentenceDetectorAnnotatorBIO is a "lumper" that works well for notes in which end of line does not indicate a sentence.
// If that is not your case, then you may get better results using the more standard SentenceDetector
-// add SentenceDetector
+add concurrent.ThreadSafeSentenceDetector
// By default, paragraphs are parsed using empty lines as separators and Part #:
add ParagraphAnnotator
Modified: ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/sections/DefaultSectionRegex.bsv
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/sections/DefaultSectionRegex.bsv?rev=1816385&r1=1816384&r2=1816385&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/sections/DefaultSectionRegex.bsv (original)
+++ ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/sections/DefaultSectionRegex.bsv Sun Nov 26 15:56:23 2017
@@ -46,7 +46,7 @@ History Source||^[\t ]*(?:HISTORY|HX) (?
Current Pregnancy||^[\t ]*CURRENT PREGNANCY[\t ]*:?[\t ]*$
Special Procedures||^[\t ]*SPECIAL PROCEDURES?[\t ]*:?[\t ]*$
Operative Findings||^[\t ]*OPERATIVE FINDINGS?[\t ]*:?[\t ]*$
-Fluid Balance||^[\t ]*(?:FLUID BALANCE)|(?:I(?:NPUT)? ?\/? ?O(?:UTPUT)?)[\t ]*:?[\t ]*$
+Fluid Balance||^[\t ]*(?:(?:FLUID BALANCE)|(?:I(?:NPUT)? ?\/? ?O(?:UTPUT)?))[\t ]*:?[\t ]*$
Blood Pressure||^[\t ]*(?:BLOOD PRESSURE|BP)[\t ]*:?[\t ]*$
Post Procedure Diagnosis||^[\t ]*POST\-?(?:PROCEDURE|OP|OPERATIVE) DIAGNOSIS[\t ]*:?[\t ]*$
Final Diagnosis||^[\t ]*FINAL DIAGNOSIS[\t ]*:?[\t ]*$
Modified: ctakes/trunk/ctakes-core/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/pom.xml?rev=1816385&r1=1816384&r2=1816385&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/pom.xml (original)
+++ ctakes/trunk/ctakes-core/pom.xml Sun Nov 26 15:56:23 2017
@@ -128,5 +128,10 @@
<groupId>org.cleartk</groupId>
<artifactId>cleartk-ml</artifactId>
</dependency>
+ <dependency>
+ <groupId>org.apache.uima</groupId>
+ <artifactId>uimafit-cpe</artifactId>
+ <version>2.3.0</version>
+ </dependency>
</dependencies>
</project>
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/BsvRegexSectionizer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/BsvRegexSectionizer.java?rev=1816385&r1=1816384&r2=1816385&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/BsvRegexSectionizer.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/BsvRegexSectionizer.java Sun Nov 26 15:56:23 2017
@@ -44,7 +44,7 @@ public class BsvRegexSectionizer extends
* {@inheritDoc}
*/
@Override
- protected void loadSections() throws ResourceInitializationException {
+ synchronized protected void loadSections() throws ResourceInitializationException {
if ( _sectionTypesPath == null ) {
LOGGER.error( "No " + SECTION_TYPES_DESC );
return;
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/RegexSectionizer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/RegexSectionizer.java?rev=1816385&r1=1816384&r2=1816385&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/RegexSectionizer.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/RegexSectionizer.java Sun Nov 26 15:56:23 2017
@@ -15,7 +15,6 @@ import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import java.util.*;
-import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -50,7 +49,7 @@ abstract public class RegexSectionizer e
static private final String DEFAULT_SEGMENT_ID = "SIMPLE_SEGMENT";
static private final String SECTION_NAME_EX = "SECTION_NAME";
static public final String DIVIDER_LINE_NAME = "DIVIDER_LINE";
- static private final Pattern DIVIDER_LINE_PATTERN = Pattern.compile( "^[_\\-=]{4,}\\r?\\n" );
+ static private final Pattern DIVIDER_LINE_PATTERN = Pattern.compile( "^[\\t ]*[_\\-=]{4,}[\\t ]*$" );
private enum TagType {
HEADER, FOOTER, DIVIDER
@@ -108,8 +107,9 @@ abstract public class RegexSectionizer e
}
- // ugly, and I wouldn't normally do this, but ...
- static private final Map<String, SectionType> _sectionTypes = new ConcurrentHashMap<>();
+ static private final Object SECTION_TYPE_LOCK = new Object();
+ static private final Map<String, SectionType> _sectionTypes = new HashMap<>();
+ static private volatile boolean _sectionsLoaded = false;
static protected void addSectionType( final SectionType sectionType ) {
_sectionTypes.put( sectionType.__name, sectionType );
@@ -125,7 +125,12 @@ abstract public class RegexSectionizer e
@Override
public void initialize( final UimaContext context ) throws ResourceInitializationException {
super.initialize( context );
- loadSections();
+ synchronized (SECTION_TYPE_LOCK) {
+ if ( !_sectionsLoaded ) {
+ loadSections();
+ _sectionsLoaded = true;
+ }
+ }
}
/**
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/html/HtmlTextWriter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/html/HtmlTextWriter.java?rev=1816385&r1=1816384&r2=1816385&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/html/HtmlTextWriter.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/html/HtmlTextWriter.java Sun Nov 26 15:56:23 2017
@@ -34,6 +34,7 @@ import java.io.IOException;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.*;
+import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
@@ -83,7 +84,7 @@ final public class HtmlTextWriter extend
static private final String CSS_FILENAME = "ctakes.pretty.css";
static private final String JS_FILENAME = "ctakes.pretty.js";
- private final Collection<String> _usedDirectories = new HashSet<>();
+ static private final Collection<String> _usedDirectories = ConcurrentHashMap.newKeySet();
/**
* {@inheritDoc}
@@ -93,13 +94,14 @@ final public class HtmlTextWriter extend
final String outputDir,
final String documentId,
final String fileName ) throws IOException {
- if ( _usedDirectories.add( outputDir ) ) {
- final String cssPath = outputDir + '/' + CSS_FILENAME;
- CssWriter.writeCssFile( cssPath );
- final String jsPath = outputDir + '/' + JS_FILENAME;
- JsWriter.writeJsFile( jsPath );
+ synchronized (_usedDirectories) {
+ if ( _usedDirectories.add( outputDir ) ) {
+ final String cssPath = outputDir + '/' + CSS_FILENAME;
+ CssWriter.writeCssFile( cssPath );
+ final String jsPath = outputDir + '/' + JS_FILENAME;
+ JsWriter.writeJsFile( jsPath );
+ }
}
-
final File htmlFile = new File( outputDir, fileName + FILE_EXTENSION );
LOGGER.info( "Writing HTML to " + htmlFile.getPath() + " ..." );
try ( final BufferedWriter writer = new BufferedWriter( new FileWriter( htmlFile ) ) ) {
Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/concurrent/ThreadSafeSentenceDetector.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/concurrent/ThreadSafeSentenceDetector.java?rev=1816385&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/concurrent/ThreadSafeSentenceDetector.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/concurrent/ThreadSafeSentenceDetector.java Sun Nov 26 15:56:23 2017
@@ -0,0 +1,92 @@
+package org.apache.ctakes.core.concurrent;
+
+import org.apache.ctakes.core.ae.SentenceDetector;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+
+/**
+ * Normally I would use composition and a singleton, but here extension is done for @ConfigurationParameter discovery.
+ * Made a singleton mostly for model memory.
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 11/20/2017
+ */
+@PipeBitInfo(
+ name = "Thread Safe Sentence Detector",
+ description = "Annotates Sentences based upon an OpenNLP model.",
+ dependencies = { PipeBitInfo.TypeProduct.SECTION },
+ products = { PipeBitInfo.TypeProduct.SENTENCE }
+)
+final public class ThreadSafeSentenceDetector extends SentenceDetector {
+
+ static private final Logger LOGGER = Logger.getLogger( "ThreadedSentenceDetector" );
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void initialize( final UimaContext context ) throws ResourceInitializationException {
+ SdSingleton.getInstance().initialize( context );
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void process( final JCas jCas ) throws AnalysisEngineProcessException {
+ SdSingleton.getInstance().process( jCas );
+ }
+
+ /**
+ * @return a sentence detector
+ * @throws ResourceInitializationException -
+ */
+ public static AnalysisEngineDescription createAnnotatorDescription() throws ResourceInitializationException {
+ return AnalysisEngineFactory.createEngineDescription( ThreadSafeSentenceDetector.class );
+ }
+
+ private enum SdSingleton implements ThreadSafeWrapper<SentenceDetector> {
+ INSTANCE;
+
+ static public SdSingleton getInstance() {
+ return INSTANCE;
+ }
+
+ private final SentenceDetector _delegate;
+ private boolean _initialized;
+
+ SdSingleton() {
+ _delegate = new SentenceDetector();
+ }
+
+ final private Object LOCK = new Object();
+
+ @Override
+ public Object getLock() {
+ return LOCK;
+ }
+
+ @Override
+ public SentenceDetector getDelegate() {
+ return _delegate;
+ }
+
+ @Override
+ public boolean isInitialized() {
+ return _initialized;
+ }
+
+ @Override
+ public void setInitialized( final boolean initialized ) {
+ _initialized = initialized;
+ }
+ }
+
+}
Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/concurrent/ThreadSafeSentenceDetectorBio.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/concurrent/ThreadSafeSentenceDetectorBio.java?rev=1816385&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/concurrent/ThreadSafeSentenceDetectorBio.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/concurrent/ThreadSafeSentenceDetectorBio.java Sun Nov 26 15:56:23 2017
@@ -0,0 +1,111 @@
+package org.apache.ctakes.core.concurrent;
+
+import org.apache.ctakes.core.ae.SentenceDetectorAnnotatorBIO;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.ml.jar.GenericJarClassifierFactory;
+
+/**
+ * Normally I would use composition and a singleton, but here extension is done for @ConfigurationParameter discovery.
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 11/19/2017
+ */
+@PipeBitInfo(
+ name = "Thread Safe Sentence Detector BIO",
+ description = "Thread safe sentence detector that uses B I O for determination. " +
+ "Useful for documents in which newlines may not indicate sentence boundaries.",
+ role = PipeBitInfo.Role.ANNOTATOR,
+ dependencies = PipeBitInfo.TypeProduct.SECTION,
+ products = PipeBitInfo.TypeProduct.SENTENCE
+)
+final public class ThreadSafeSentenceDetectorBio extends SentenceDetectorAnnotatorBIO {
+
+ static private final Logger LOGGER = Logger.getLogger( "ThreadSafeSentenceDetectorBio" );
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void initialize( final UimaContext context ) throws ResourceInitializationException {
+ SdBioSingleton.getInstance().initialize( context );
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void process( final JCas jCas ) throws AnalysisEngineProcessException {
+ SdBioSingleton.getInstance().process( jCas );
+ }
+
+ /**
+ * @param modelPath model using b i o tagging
+ * @return a sentence detector using the given model
+ * @throws ResourceInitializationException -
+ */
+ public static AnalysisEngineDescription createAnnotatorDescription( final String modelPath ) throws ResourceInitializationException {
+ return AnalysisEngineFactory.createEngineDescription(
+ ThreadSafeSentenceDetectorBio.class,
+ SentenceDetectorAnnotatorBIO.PARAM_IS_TRAINING,
+ false,
+ GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+ modelPath,
+ SentenceDetectorAnnotatorBIO.PARAM_FEAT_CONFIG,
+ SentenceDetectorAnnotatorBIO.FEAT_CONFIG.CHAR );
+ }
+
+ /**
+ * @return a sentence detector using a default model
+ * @throws ResourceInitializationException -
+ */
+ public static AnalysisEngineDescription createAnnotatorDescription() throws ResourceInitializationException {
+ return createAnnotatorDescription( "/org/apache/ctakes/core/sentdetect/model.jar" );
+ }
+
+ private enum SdBioSingleton implements ThreadSafeWrapper<SentenceDetectorAnnotatorBIO> {
+ INSTANCE;
+
+ static public SdBioSingleton getInstance() {
+ return INSTANCE;
+ }
+
+ private final SentenceDetectorAnnotatorBIO _delegate;
+ private boolean _initialized;
+
+ SdBioSingleton() {
+ _delegate = new SentenceDetectorAnnotatorBIO();
+ }
+
+ final private Object LOCK = new Object();
+
+ @Override
+ public Object getLock() {
+ return LOCK;
+ }
+
+ @Override
+ public SentenceDetectorAnnotatorBIO getDelegate() {
+ return _delegate;
+ }
+
+ @Override
+ public boolean isInitialized() {
+ return _initialized;
+ }
+
+ @Override
+ public void setInitialized( final boolean initialized ) {
+ _initialized = initialized;
+ }
+ }
+
+
+}
Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/concurrent/ThreadSafeWrapper.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/concurrent/ThreadSafeWrapper.java?rev=1816385&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/concurrent/ThreadSafeWrapper.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/concurrent/ThreadSafeWrapper.java Sun Nov 26 15:56:23 2017
@@ -0,0 +1,172 @@
+package org.apache.ctakes.core.concurrent;
+
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.AnalysisComponent;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.analysis_engine.ResultSpecification;
+import org.apache.uima.cas.AbstractCas;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.internal.ExtendedLogger;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceConfigurationException;
+import org.apache.uima.resource.ResourceInitializationException;
+
+/**
+ * To take advantage of singletons for thread safety and enums for singletons,
+ * utilize jdk 8+ interface default methods so that enums can implement AnalysisComponent without
+ * boilerplate code for every method.
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 11/20/2017
+ */
+public interface ThreadSafeWrapper<AE extends JCasAnnotator_ImplBase> extends AnalysisComponent {
+
+ /**
+ * @return an object upon which to lock the ae
+ */
+ Object getLock();
+
+ /**
+ * @return The annotator wrapped by this object
+ */
+ AE getDelegate();
+
+ /**
+ * @return true if initialized
+ */
+ boolean isInitialized();
+
+ /**
+ * @param initialized true if initialized
+ */
+ void setInitialized( final boolean initialized );
+
+ /**
+ * Calls initialize on the single instance if and only if it has not already been initialized
+ */
+ @Override
+ default void initialize( final UimaContext context ) throws ResourceInitializationException {
+ synchronized (getLock()) {
+ if ( !isInitialized() ) {
+ getDelegate().initialize( context );
+ setInitialized( true );
+ }
+ }
+ }
+
+ /**
+ * Calls process on the single instance if it is not already processing
+ */
+ default void process( final JCas jCas ) throws AnalysisEngineProcessException {
+ synchronized (getLock()) {
+ getDelegate().process( jCas );
+ }
+ }
+
+ /**
+ * from uimafit JCasAnnotator_ImplBase
+ *
+ * @return -
+ */
+ default ExtendedLogger getLogger() {
+ return getDelegate().getLogger();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ default void reconfigure() throws ResourceConfigurationException, ResourceInitializationException {
+ synchronized (getLock()) {
+ getDelegate().reconfigure();
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ default void batchProcessComplete() throws AnalysisEngineProcessException {
+ synchronized (getLock()) {
+ getDelegate().batchProcessComplete();
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ default void collectionProcessComplete() throws AnalysisEngineProcessException {
+ synchronized (getLock()) {
+ getDelegate().collectionProcessComplete();
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ default void destroy() {
+ synchronized (getLock()) {
+ getDelegate().destroy();
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ default void process( final AbstractCas aCas ) throws AnalysisEngineProcessException {
+ synchronized (getLock()) {
+ getDelegate().process( aCas );
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ default boolean hasNext() throws AnalysisEngineProcessException {
+ synchronized (getLock()) {
+ return getDelegate().hasNext();
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ default AbstractCas next() throws AnalysisEngineProcessException {
+ synchronized (getLock()) {
+ return getDelegate().next();
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ default Class<JCas> getRequiredCasInterface() {
+ return getDelegate().getRequiredCasInterface();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ default int getCasInstancesRequired() {
+ return getDelegate().getCasInstancesRequired();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ default void setResultSpecification( final ResultSpecification resultSpec ) {
+ synchronized (getLock()) {
+ getDelegate().setResultSpecification( resultSpec );
+ }
+ }
+
+}
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java?rev=1816385&r1=1816384&r2=1816385&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java Sun Nov 26 15:56:23 2017
@@ -21,6 +21,9 @@ import org.apache.uima.util.ProgressImpl
import java.io.*;
import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.*;
import java.util.stream.Collectors;
@@ -300,6 +303,7 @@ final public class FileTreeReader extend
* @throws IOException if the file could not be read
*/
private String readFile( final File file ) throws IOException {
+ LOGGER.info( "Reading " + file.getPath() );
try {
return readByPath( file );
} catch ( IOException ioE ) {
@@ -324,12 +328,19 @@ final public class FileTreeReader extend
return stream.collect( Collectors.joining( "\n" ) );
}
} else {
- try ( Stream<String> stream = Files.lines( file.toPath() ) ) {
- return stream.collect( Collectors.joining( "\n" ) );
- }
+ return safeReadByPath( file );
+// try ( Stream<String> stream = Files.lines( file.toPath() ) ) {
+// return stream.collect( Collectors.joining( "\n" ) );
+// }
}
}
+ static private String safeReadByPath( final File file ) throws IOException {
+ final CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput( CodingErrorAction.IGNORE );
+ try ( BufferedReader reader = new BufferedReader( new InputStreamReader( Files.newInputStream( file.toPath() ), decoder ) ) ) {
+ return reader.lines().collect( Collectors.joining( "\n" ) );
+ }
+ }
/**
* Reads file using buffered input stream
Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipeBitLocator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipeBitLocator.java?rev=1816385&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipeBitLocator.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipeBitLocator.java Sun Nov 26 15:56:23 2017
@@ -0,0 +1,287 @@
+package org.apache.ctakes.core.pipeline;
+
+
+import org.apache.log4j.Logger;
+import org.apache.uima.analysis_component.AnalysisComponent;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.resource.ResourceInitializationException;
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.stream.Collectors;
+
+/**
+ * Utility methods to find annotation engines and collection readers without and package specified.
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 11/18/2017
+ */
+public enum PipeBitLocator {
+ INSTANCE;
+
+ static public PipeBitLocator getInstance() {
+ return INSTANCE;
+ }
+
+ static private final Logger LOGGER = Logger.getLogger( "PipeBitFinder" );
+ static private final Object[] EMPTY_OBJECT_ARRAY = new Object[ 0 ];
+ static private final String[] CTAKES_PACKAGES
+ = { "core",
+ "contexttokenizer",
+ "postagger",
+ "chunker",
+ "dictionary.lookup.fast",
+ "assertion",
+ "dictionary.lookup2",
+ "clinicalpipeline",
+ "clinical.pipeline",
+ "constituency.parser",
+ "lvg",
+ "relationextractor",
+ "coreference",
+ "dependency.parser",
+ "temporal",
+ "drug-ner",
+ "necontexts",
+ "preprocessor",
+ "sideeffect",
+ "smokingstatus",
+ "dictionary.lookup",
+ "template.filler" };
+
+
+ private final Collection<String> _userPackages = new HashSet<>();
+
+ public Collection<String> getCtakesPackages() {
+ return Arrays.stream( CTAKES_PACKAGES )
+ .map( p -> "org.apache.ctakes." + p )
+ .collect( Collectors.toList() );
+ }
+
+ /**
+ * @return user package or directory
+ */
+ public Collection<String> getUserPackages() {
+ return Collections.unmodifiableCollection( _userPackages );
+ }
+
+ /**
+ * Add some user package or directory to the known path
+ *
+ * @param packagePath user package or directory
+ */
+ public void addUserPackage( final String packagePath ) {
+ _userPackages.add( packagePath );
+ }
+
+ /**
+ * @param className fully-specified or simple name of an ae or cc component class
+ * @return discovered class for ae or cc
+ * @throws ResourceInitializationException if the class could not be found
+ */
+ public Class<? extends AnalysisComponent> getComponentClass( final String className ) throws
+ ResourceInitializationException {
+ Class componentClass;
+ try {
+ componentClass = Class.forName( className );
+ } catch ( ClassNotFoundException cnfE ) {
+ componentClass = getPackagedComponent( className );
+ }
+ if ( componentClass == null ) {
+ throw new ResourceInitializationException(
+ "No Analysis Component found for " + className, EMPTY_OBJECT_ARRAY );
+ }
+ assertClassType( componentClass, AnalysisComponent.class );
+ return componentClass;
+ }
+
+ /**
+ * @param className fully-specified or simple name of an ae or cc component class
+ * @return discovered class for ae or cc
+ */
+ private Class<? extends AnalysisComponent> getPackagedComponent( final String className ) {
+ Class componentClass;
+ for ( String packageName : _userPackages ) {
+ componentClass = getPackagedClass( packageName, className, AnalysisComponent.class );
+ if ( componentClass != null ) {
+ return componentClass;
+ }
+ }
+ for ( String packageName : getCtakesPackages() ) {
+ componentClass = getPackagedClass( packageName + ".ae", className, AnalysisComponent.class );
+ if ( componentClass != null ) {
+ return componentClass;
+ }
+ componentClass = getPackagedClass( packageName + ".cc", className, AnalysisComponent.class );
+ if ( componentClass != null ) {
+ return componentClass;
+ }
+ componentClass = getPackagedClass( packageName, className, AnalysisComponent.class );
+ if ( componentClass != null ) {
+ return componentClass;
+ }
+ }
+ return null;
+ }
+
+ /**
+ * @param className fully-specified or simple name of a cr Collection Reader class
+ * @return a class for the reader
+ * @throws ResourceInitializationException if the class could not be found or instantiated
+ */
+ public Class<? extends CollectionReader> getReaderClass( final String className )
+ throws ResourceInitializationException {
+ Class readerClass;
+ try {
+ readerClass = Class.forName( className );
+ } catch ( ClassNotFoundException cnfE ) {
+ readerClass = getPackagedReader( className );
+ }
+ if ( readerClass == null ) {
+ throw new ResourceInitializationException( "No Collection Reader found for " + className, EMPTY_OBJECT_ARRAY );
+ }
+ assertClassType( readerClass, CollectionReader.class );
+ return readerClass;
+ }
+
+ /**
+ * @param className simple name of a cr Collection Reader class
+ * @return discovered class for a cr
+ */
+ private Class<? extends CollectionReader> getPackagedReader( final String className ) {
+ Class readerClass;
+ for ( String packageName : _userPackages ) {
+ readerClass = getPackagedClass( packageName, className, CollectionReader.class );
+ if ( readerClass != null ) {
+ return readerClass;
+ }
+ }
+ for ( String packageName : getCtakesPackages() ) {
+ readerClass = getPackagedClass( packageName + ".cr", className, CollectionReader.class );
+ if ( readerClass != null ) {
+ return readerClass;
+ }
+ readerClass = getPackagedClass( packageName, className, CollectionReader.class );
+ if ( readerClass != null ) {
+ return readerClass;
+ }
+ }
+ return null;
+ }
+
+ /**
+ * @param packageName possible package for class
+ * @param className simple name for class
+ * @param wantedClassType desired superclass type
+ * @return discovered class or null if no proper class was discovered
+ */
+ static private Class<?> getPackagedClass( final String packageName, final String className,
+ final Class<?> wantedClassType ) {
+ try {
+ Class<?> classType = Class.forName( packageName + "." + className );
+ if ( isClassType( classType, wantedClassType ) ) {
+ return classType;
+ }
+ } catch ( ClassNotFoundException cnfE ) {
+ // do nothing
+ }
+ return null;
+ }
+
+ /**
+ * This requires that the component class has a static createAnnotatorDescription method with no parameters
+ *
+ * @param className component class for which a descriptor should be created
+ * @param values optional parameter values for the descriptor creator
+ * @return a description generated for the component
+ * @throws ResourceInitializationException if anything went wrong with finding the class or the method,
+ * or invoking the method to get an AnalysisEngineDescription
+ */
+ public AnalysisEngineDescription createDescription( final String className, final Object... values )
+ throws ResourceInitializationException {
+ final Class<? extends AnalysisComponent> componentClass = getComponentClass( className );
+ Method method;
+ try {
+ if ( values.length == 0 ) {
+ method = componentClass.getMethod( "createAnnotatorDescription" );
+ } else {
+ method = componentClass.getMethod( "createAnnotatorDescription", getValueTypes( values ) );
+ }
+ } catch ( NoSuchMethodException nsmE ) {
+ try {
+ if ( values.length == 0 ) {
+ method = componentClass.getMethod( "createEngineDescription" );
+ } else {
+ method = componentClass.getMethod( "createEngineDescription", getValueTypes( values ) );
+ }
+ } catch ( NoSuchMethodException nsmE2 ) {
+ LOGGER.error( "No createAnnotatorDescription or createEngineDescription method in " + className );
+ throw new ResourceInitializationException( nsmE2 );
+ }
+ }
+ try {
+ final Object invocation = method.invoke( null, values );
+ if ( !AnalysisEngineDescription.class.isInstance( invocation ) ) {
+ LOGGER.error( method.getName() + " in " + className + " returned an "
+ + invocation.getClass().getName() + " not an AnalysisEngineDescription" );
+ throw new ResourceInitializationException();
+ }
+ return (AnalysisEngineDescription) invocation;
+ } catch ( IllegalAccessException | InvocationTargetException multE ) {
+ LOGGER.error( "Could not invoke " + method.getName() + " on " + className );
+ throw new ResourceInitializationException( multE );
+ }
+ }
+
+ /**
+ * The java reflection getMethod does not handle autoboxing/unboxing.
+ * So, we assume that Integer and Boolean parameter values will actually be primitives.
+ *
+ * @param values parameter value objects
+ * @return parameter value class types, unboxing to primitives where needed
+ */
+ static private Class<?>[] getValueTypes( final Object... values ) {
+ final Class<?>[] classArray = new Class[ values.length ];
+ for ( int i = 0; i < values.length; i++ ) {
+ final Class<?> type = values[ i ].getClass();
+ if ( type.equals( Integer.class ) ) {
+ classArray[ i ] = int.class;
+ } else if ( type.equals( Boolean.class ) ) {
+ classArray[ i ] = boolean.class;
+ } else {
+ classArray[ i ] = type;
+ }
+ }
+ return classArray;
+ }
+
+ /**
+ * @param classType class type to test
+ * @param wantedClassType wanted class type
+ * @throws ResourceInitializationException if the class type does not extend the wanted class type
+ */
+ static private void assertClassType( final Class<?> classType, final Class<?> wantedClassType )
+ throws ResourceInitializationException {
+ if ( !isClassType( classType, wantedClassType ) ) {
+ throw new ResourceInitializationException(
+ "Not " + wantedClassType.getSimpleName() + " " + classType.getName(), EMPTY_OBJECT_ARRAY );
+ }
+ }
+
+ /**
+ * @param classType class type to test
+ * @param wantedClassType wanted class type
+ * @return true if the class type extends the wanted class type
+ */
+ static private boolean isClassType( final Class<?> classType, final Class<?> wantedClassType ) {
+ return wantedClassType.isAssignableFrom( classType );
+ }
+
+
+}
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineBuilder.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineBuilder.java?rev=1816385&r1=1816384&r2=1816385&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineBuilder.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineBuilder.java Sun Nov 26 15:56:23 2017
@@ -9,14 +9,18 @@ import org.apache.log4j.Logger;
import org.apache.uima.UIMAException;
import org.apache.uima.analysis_component.AnalysisComponent;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.collection.CollectionProcessingEngine;
import org.apache.uima.collection.CollectionReader;
import org.apache.uima.collection.CollectionReaderDescription;
+import org.apache.uima.collection.metadata.CpeDescriptorException;
+import org.apache.uima.fit.cpe.CpeBuilder;
import org.apache.uima.fit.factory.AggregateBuilder;
import org.apache.uima.fit.factory.CollectionReaderFactory;
import org.apache.uima.fit.factory.JCasFactory;
import org.apache.uima.fit.pipeline.SimplePipeline;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
+import org.xml.sax.SAXException;
import java.io.IOException;
import java.util.ArrayList;
@@ -49,7 +53,10 @@ final public class PipelineBuilder {
// Allow the pipeline to be changed even after it has been built once.
private AnalysisEngineDescription _analysisEngineDesc;
private boolean _pipelineChanged;
-
+ private int _threadCount = 1;
+// Full Pipeline, single: 5.53/4.19 4.53/4.03 4.56/4.06 4:45/4.03
+// Full Pipeline, 2 proc: 5.23/3.16 3.59/2.55 4.01/2.55 4.00/2.55
+// Full Pipeline, 3 proc: 4:20/2.49 3:49/2:44 3.46/2.36 3.44/2.43
public PipelineBuilder() {
_aeNameList = new ArrayList<>();
@@ -58,6 +65,7 @@ final public class PipelineBuilder {
_aeEndNameList = new ArrayList<>();
_aeEndViewList = new ArrayList<>();
_descEndList = new ArrayList<>();
+ _threadCount = 1;
}
public void clear() {
@@ -67,6 +75,7 @@ final public class PipelineBuilder {
_aeEndNameList.clear();
_aeEndViewList.clear();
_descEndList.clear();
+ _threadCount = 1;
}
/**
@@ -305,6 +314,25 @@ final public class PipelineBuilder {
ConfigParameterConstants.PARAM_OUTPUTDIR, outputDirectory );
}
+ public PipelineBuilder threads( final int threadCount ) {
+ if ( threadCount <= 1 ) {
+ if ( threadCount < 1 ) {
+ LOGGER.warn( "Thread count (" + threadCount + ") cannot be below 1. Using 1 thread for processing." );
+ }
+ _threadCount = 1;
+ return this;
+ }
+ final int coreCount = Runtime.getRuntime().availableProcessors();
+ if ( threadCount > coreCount ) {
+ LOGGER.warn( "Thread count (" + threadCount + ") is greater than core count ("
+ + coreCount + "). Using core count for processing." );
+ _threadCount = coreCount;
+ return this;
+ }
+ _threadCount = threadCount;
+ return this;
+ }
+
/**
* Initialize a pipeline that can be used repeatedly using {@link #run} and {@link #run(String)}.
* A pipeline can be extended between builds, but the full pipeline will be rebuilt on each call.
@@ -322,8 +350,6 @@ final public class PipelineBuilder {
for ( int i = 0; i < _descEndList.size(); i++ ) {
builder.add( _descEndList.get( i ), _aeEndViewList.get( i ) );
}
-// _descList.forEach( builder::add );
-// _descEndList.forEach( builder::add );
_analysisEngineDesc = builder.createAggregateDescription();
}
_pipelineChanged = false;
@@ -345,7 +371,21 @@ final public class PipelineBuilder {
return this;
}
build();
- SimplePipeline.runPipeline( _readerDesc, _analysisEngineDesc );
+ if ( _threadCount == 1 ) {
+ SimplePipeline.runPipeline( _readerDesc, _analysisEngineDesc );
+ } else {
+ final CpeBuilder cpeBuilder = new CpeBuilder();
+ try {
+ cpeBuilder.setReader( _readerDesc );
+ cpeBuilder.setAnalysisEngine( _analysisEngineDesc );
+ cpeBuilder.setMaxProcessingUnitThreadCount( _threadCount );
+ final CollectionProcessingEngine cpe = cpeBuilder.createCpe( null );
+ cpe.process();
+ } catch ( CpeDescriptorException | SAXException multE ) {
+ LOGGER.error( multE.getMessage(), multE );
+ throw new UIMAException( multE );
+ }
+ }
return this;
}
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PiperFileReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PiperFileReader.java?rev=1816385&r1=1816384&r2=1816385&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PiperFileReader.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PiperFileReader.java Sun Nov 26 15:56:23 2017
@@ -3,16 +3,12 @@ package org.apache.ctakes.core.pipeline;
import org.apache.ctakes.core.cc.XmiWriterCasConsumerCtakes;
import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.core.util.DotLogger;
import org.apache.log4j.Logger;
import org.apache.uima.UIMAException;
-import org.apache.uima.analysis_component.AnalysisComponent;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
-import org.apache.uima.collection.CollectionReader;
-import org.apache.uima.resource.ResourceInitializationException;
import java.io.*;
-import java.lang.reflect.InvocationTargetException;
-import java.lang.reflect.Method;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -54,29 +50,6 @@ final public class PiperFileReader {
static public final String AE_VIEW_NAMES = "AeViews";
- static private final String[] CTAKES_PACKAGES
- = { "core",
- "contexttokenizer",
- "postagger",
- "chunker",
- "dictionary.lookup.fast",
- "assertion",
- "dictionary.lookup2",
- "clinicalpipeline",
- "clinical.pipeline",
- "constituency.parser",
- "lvg",
- "relationextractor",
- "coreference",
- "dependency.parser",
- "temporal",
- "drug-ner",
- "necontexts",
- "preprocessor",
- "sideeffect",
- "smokingstatus",
- "dictionary.lookup",
- "template.filler" };
static private final Object[] EMPTY_OBJECT_ARRAY = new Object[ 0 ];
@@ -91,7 +64,6 @@ final public class PiperFileReader {
private PipelineBuilder _builder;
- private final Collection<String> _userPackages;
private CliOptionals _cliOptionals;
/**
@@ -99,7 +71,6 @@ final public class PiperFileReader {
*/
public PiperFileReader() {
_builder = new PipelineBuilder();
- _userPackages = new ArrayList<>();
}
/**
@@ -109,9 +80,7 @@ final public class PiperFileReader {
* @throws UIMAException if the pipeline cannot be loaded
*/
public PiperFileReader( final String filePath ) throws UIMAException {
- _builder = new PipelineBuilder();
- _userPackages = new ArrayList<>();
- loadPipelineFile( filePath );
+ this( filePath, null );
}
/**
@@ -123,9 +92,13 @@ final public class PiperFileReader {
*/
public PiperFileReader( final String filePath, final CliOptionals cliOptionals ) throws UIMAException {
_builder = new PipelineBuilder();
- _userPackages = new ArrayList<>();
- setCliOptionals( cliOptionals );
- loadPipelineFile( filePath );
+ if ( cliOptionals != null ) {
+ setCliOptionals( cliOptionals );
+ }
+ if ( !loadPipelineFile( filePath ) ) {
+ LOGGER.error( "Piper File contained invalid command or parameters, exiting cTAKES." );
+ System.exit( 1 );
+ }
}
public void setCliOptionals( final CliOptionals cliOptionals ) {
@@ -133,20 +106,13 @@ final public class PiperFileReader {
}
/**
- * Add some user package or directory to the known path
- * @param packagePath user package or directory
- */
- public void addUserPackage( final String packagePath ) {
- _userPackages.add( packagePath );
- }
-
- /**
* Load a file with command parameter pairs for building a pipeline
*
* @param filePath path to the pipeline command file
*/
public boolean loadPipelineFile( final String filePath ) throws UIMAException {
- try ( final BufferedReader reader = getPiperReader( filePath ) ) {
+ LOGGER.info( "Loading Piper File " + filePath + " ..." );
+ try ( final BufferedReader reader = getPiperReader( filePath ); DotLogger logger = new DotLogger() ) {
String line = reader.readLine();
while ( line != null ) {
parsePipelineLine( line.trim() );
@@ -154,7 +120,6 @@ final public class PiperFileReader {
}
} catch ( IOException ioE ) {
LOGGER.error( "Could not read piper file: " + filePath );
-// return false;
throw new UIMAException( ioE );
}
return true;
@@ -193,7 +158,7 @@ final public class PiperFileReader {
case "load":
return loadPipelineFile( info );
case "package":
- addUserPackage( info );
+ PipeBitLocator.getInstance().addUserPackage( info );
return true;
case "set":
_builder.set( splitParameters( info ) );
@@ -206,9 +171,9 @@ final public class PiperFileReader {
final String[] component_parameters = splitFromParameters( info );
final String component = component_parameters[ 0 ];
final Object[] parameters = splitParameters( component_parameters[ 1 ] );
- _builder.reader( getReaderClass( component ), parameters );
+ _builder.reader( PipeBitLocator.getInstance().getReaderClass( component ), parameters );
} else {
- _builder.reader( getReaderClass( info ) );
+ _builder.reader( PipeBitLocator.getInstance().getReaderClass( info ) );
}
return true;
case "readFiles":
@@ -223,9 +188,9 @@ final public class PiperFileReader {
final String[] component_parameters = splitFromParameters( info );
final String component = component_parameters[ 0 ];
final Object[] parameters = splitParameters( component_parameters[ 1 ] );
- _builder.add( getComponentClass( component ), views, parameters );
+ _builder.add( PipeBitLocator.getInstance().getComponentClass( component ), views, parameters );
} else {
- _builder.add( getComponentClass( info ), views );
+ _builder.add( PipeBitLocator.getInstance().getComponentClass( info ), views );
}
return true;
case "addLogged":
@@ -233,9 +198,9 @@ final public class PiperFileReader {
final String[] component_parameters = splitFromParameters( info );
final String component = component_parameters[ 0 ];
final Object[] parameters = splitParameters( component_parameters[ 1 ] );
- _builder.addLogged( getComponentClass( component ), views, parameters );
+ _builder.addLogged( PipeBitLocator.getInstance().getComponentClass( component ), views, parameters );
} else {
- _builder.addLogged( getComponentClass( info ), views );
+ _builder.addLogged( PipeBitLocator.getInstance().getComponentClass( info ), views );
}
return true;
case "addDescription":
@@ -243,10 +208,10 @@ final public class PiperFileReader {
final String[] descriptor_parameters = splitFromParameters( info );
final String component = descriptor_parameters[ 0 ];
final Object[] values = splitDescriptorValues( descriptor_parameters[ 1 ] );
- final AnalysisEngineDescription description = createDescription( component, values );
+ final AnalysisEngineDescription description = PipeBitLocator.getInstance().createDescription( component, values );
_builder.addDescription( description, views );
} else {
- final AnalysisEngineDescription description = createDescription( info );
+ final AnalysisEngineDescription description = PipeBitLocator.getInstance().createDescription( info );
_builder.addDescription( description, views );
}
return true;
@@ -255,11 +220,13 @@ final public class PiperFileReader {
final String[] component_parameters = splitFromParameters( info );
final String component = component_parameters[ 0 ];
final Object[] parameters = splitParameters( component_parameters[ 1 ] );
- _builder.addLast( getComponentClass( component ), views, parameters );
+ _builder.addLast( PipeBitLocator.getInstance().getComponentClass( component ), views, parameters );
} else {
- _builder.addLast( getComponentClass( info ), views );
+ _builder.addLast( PipeBitLocator.getInstance().getComponentClass( info ), views );
}
return true;
+ case "threads":
+ return setThreadCount( info );
case "collectCuis":
_builder.collectCuis();
return true;
@@ -279,58 +246,16 @@ final public class PiperFileReader {
}
}
- /**
- * @param className fully-specified or simple name of an ae or cc component class
- * @return discovered class for ae or cc
- * @throws ResourceInitializationException if the class could not be found
- */
- private Class<? extends AnalysisComponent> getComponentClass( final String className ) throws
- ResourceInitializationException {
- Class componentClass;
- try {
- componentClass = Class.forName( className );
- } catch ( ClassNotFoundException cnfE ) {
- componentClass = getPackagedComponent( className );
- }
- if ( componentClass == null ) {
- throw new ResourceInitializationException(
- "No Analysis Component found for " + className, EMPTY_OBJECT_ARRAY );
+ private boolean setThreadCount( final String info ) {
+ final Object count = attemptParseInt( info );
+ if ( count instanceof Integer ) {
+ _builder.threads( (Integer) count );
+ return true;
}
- assertClassType( componentClass, AnalysisComponent.class );
- return componentClass;
+ LOGGER.error( "Could not parse thread count from " + info );
+ return false;
}
- /**
- * @param className fully-specified or simple name of an ae or cc component class
- * @return discovered class for ae or cc
- */
- private Class<? extends AnalysisComponent> getPackagedComponent( final String className ) {
- Class componentClass;
- for ( String packageName : _userPackages ) {
- componentClass = getPackagedClass( packageName, className, AnalysisComponent.class );
- if ( componentClass != null ) {
- return componentClass;
- }
- }
- for ( String packageName : CTAKES_PACKAGES ) {
- componentClass = getPackagedClass(
- "org.apache.ctakes." + packageName + ".ae", className, AnalysisComponent.class );
- if ( componentClass != null ) {
- return componentClass;
- }
- componentClass = getPackagedClass(
- "org.apache.ctakes." + packageName + ".cc", className, AnalysisComponent.class );
- if ( componentClass != null ) {
- return componentClass;
- }
- componentClass = getPackagedClass(
- "org.apache.ctakes." + packageName, className, AnalysisComponent.class );
- if ( componentClass != null ) {
- return componentClass;
- }
- }
- return null;
- }
public BufferedReader getPiperReader( final String filePath ) throws FileNotFoundException {
final InputStream stream = getPiperStream( filePath );
@@ -345,29 +270,29 @@ final public class PiperFileReader {
* @return discovered path for the piper file
*/
public InputStream getPiperStream( final String filePath ) {
+ if ( !filePath.toLowerCase().endsWith( ".piper" ) ) {
+ return getPiperStream( filePath + ".piper" );
+ }
final File piperFile = new File( filePath );
String parentPath = null;
if ( piperFile.isAbsolute() ) {
parentPath = piperFile.getParent();
} else {
-// try {
final File located = FileLocator.getFileQuiet( filePath );
if ( located != null ) {
parentPath = located.getParent();
}
-// } catch ( FileNotFoundException fnfE ) {
- // do nothing
-// }
}
- if ( parentPath != null && !parentPath.isEmpty() && !_userPackages.contains( parentPath ) ) {
- _userPackages.add( parentPath );
+ if ( parentPath != null && !parentPath.isEmpty()
+ && !PipeBitLocator.getInstance().getUserPackages().contains( parentPath ) ) {
+ PipeBitLocator.getInstance().addUserPackage( parentPath );
}
InputStream stream = FileLocator.getStreamQuiet( filePath );
if ( stream != null ) {
return stream;
}
// Check user packages
- for ( String packageName : _userPackages ) {
+ for ( String packageName : PipeBitLocator.getInstance().getUserPackages() ) {
stream = FileLocator.getStreamQuiet( packageName.replace( '.', '/' ) + '/' + filePath );
if ( stream != null ) {
return stream;
@@ -378,170 +303,20 @@ final public class PiperFileReader {
}
}
// Check ctakes packages
- for ( String packageName : CTAKES_PACKAGES ) {
- stream = FileLocator.getStreamQuiet( "org/apache/ctakes/" + packageName.replace( '.', '/' ) + '/' + filePath );
+ for ( String packageName : PipeBitLocator.getInstance().getCtakesPackages() ) {
+ stream = FileLocator.getStreamQuiet( packageName.replace( '.', '/' ) + '/' + filePath );
if ( stream != null ) {
return stream;
}
- stream = FileLocator.getStreamQuiet( "org/apache/ctakes/" + packageName.replace( '.', '/' ) + "/pipeline/" + filePath );
+ stream = FileLocator.getStreamQuiet( packageName.replace( '.', '/' ) + "/pipeline/" + filePath );
if ( stream != null ) {
return stream;
}
}
- if ( !filePath.toLowerCase().endsWith( ".piper" ) ) {
- return getPiperStream( filePath + ".piper" );
- }
LOGGER.error( "No piper file found for " + filePath );
return null;
}
-
- /**
- * This requires that the component class has a static createAnnotatorDescription method with no parameters
- * @param className component class for which a descriptor should be created
- * @param values optional parameter values for the descriptor creator
- * @return a description generated for the component
- * @throws ResourceInitializationException if anything went wrong with finding the class or the method,
- * or invoking the method to get an AnalysisEngineDescription
- */
- private AnalysisEngineDescription createDescription( final String className, final Object... values )
- throws ResourceInitializationException {
- final Class<? extends AnalysisComponent> componentClass = getComponentClass( className );
- Method method;
- try {
- if ( values.length == 0 ) {
- method = componentClass.getMethod( "createAnnotatorDescription" );
- } else {
- method = componentClass.getMethod( "createAnnotatorDescription", getValueTypes( values ) );
- }
- } catch ( NoSuchMethodException nsmE ) {
- LOGGER.error( "No createAnnotatorDescription method in " + className );
- throw new ResourceInitializationException( nsmE );
- }
- try {
- final Object invocation = method.invoke( null, values );
- if ( !AnalysisEngineDescription.class.isInstance( invocation ) ) {
- LOGGER.error( "createAnnotatorDescription in " + className + " returned an "
- + invocation.getClass().getName() + " not an AnalysisEngineDescription" );
- throw new ResourceInitializationException();
- }
- return (AnalysisEngineDescription)invocation;
- } catch ( IllegalAccessException | InvocationTargetException multE ) {
- LOGGER.error( "Could not invoke createAnnotatorDescription on " + className );
- throw new ResourceInitializationException( multE );
- }
- }
-
- /**
- * The java reflection getMethod does not handle autoboxing/unboxing.
- * So, we assume that Integer and Boolean parameter values will actually be primitives.
- *
- * @param values parameter value objects
- * @return parameter value class types, unboxing to primitives where needed
- */
- static private Class<?>[] getValueTypes( final Object... values ) {
- final Class<?>[] classArray = new Class[ values.length ];
- for ( int i = 0; i < values.length; i++ ) {
- final Class<?> type = values[ i ].getClass();
- if ( type.equals( Integer.class ) ) {
- classArray[ i ] = int.class;
- } else if ( type.equals( Boolean.class ) ) {
- classArray[ i ] = boolean.class;
- } else {
- classArray[ i ] = type;
- }
- }
- return classArray;
- }
-
- /**
- * @param className fully-specified or simple name of a cr Collection Reader class
- * @return a class for the reader
- * @throws ResourceInitializationException if the class could not be found or instantiated
- */
- private Class<? extends CollectionReader> getReaderClass( final String className )
- throws ResourceInitializationException {
- Class readerClass;
- try {
- readerClass = Class.forName( className );
- } catch ( ClassNotFoundException cnfE ) {
- readerClass = getPackagedReader( className );
- }
- if ( readerClass == null ) {
- throw new ResourceInitializationException( "No Collection Reader found for " + className, EMPTY_OBJECT_ARRAY );
- }
- assertClassType( readerClass, CollectionReader.class );
- return readerClass;
- }
-
- /**
- * @param className simple name of a cr Collection Reader class
- * @return discovered class for a cr
- */
- private Class<? extends CollectionReader> getPackagedReader( final String className ) {
- Class readerClass;
- for ( String packageName : _userPackages ) {
- readerClass = getPackagedClass( packageName, className, CollectionReader.class );
- if ( readerClass != null ) {
- return readerClass;
- }
- }
- for ( String packageName : CTAKES_PACKAGES ) {
- readerClass = getPackagedClass(
- "org.apache.ctakes." + packageName + ".cr", className, CollectionReader.class );
- if ( readerClass != null ) {
- return readerClass;
- }
- readerClass = getPackagedClass(
- "org.apache.ctakes." + packageName, className, CollectionReader.class );
- if ( readerClass != null ) {
- return readerClass;
- }
- }
- return null;
- }
-
- /**
- * @param packageName possible package for class
- * @param className simple name for class
- * @param wantedClassType desired superclass type
- * @return discovered class or null if no proper class was discovered
- */
- static private Class<?> getPackagedClass( final String packageName, final String className,
- final Class<?> wantedClassType ) {
- try {
- Class<?> classType = Class.forName( packageName + "." + className );
- if ( isClassType( classType, wantedClassType ) ) {
- return classType;
- }
- } catch ( ClassNotFoundException cnfE ) {
- // do nothing
- }
- return null;
- }
-
- /**
- * @param classType class type to test
- * @param wantedClassType wanted class type
- * @throws ResourceInitializationException if the class type does not extend the wanted class type
- */
- static private void assertClassType( final Class<?> classType, final Class<?> wantedClassType )
- throws ResourceInitializationException {
- if ( !isClassType( classType, wantedClassType ) ) {
- throw new ResourceInitializationException(
- "Not " + wantedClassType.getSimpleName() + " " + classType.getName(), EMPTY_OBJECT_ARRAY );
- }
- }
-
- /**
- * @param classType class type to test
- * @param wantedClassType wanted class type
- * @return true if the class type extends the wanted class type
- */
- static private boolean isClassType( final Class<?> classType, final Class<?> wantedClassType ) {
- return wantedClassType.isAssignableFrom( classType );
- }
-
/**
*
* @param text -
@@ -559,11 +334,12 @@ final public class PiperFileReader {
final String[] allSplits = SPACE_PATTERN.split( text );
final String[] returnSplits = new String[ 2 ];
returnSplits[ 0 ] = allSplits[ 0 ];
- String parameters = allSplits[ 1 ];
+ final StringBuilder paramBuilder = new StringBuilder();
+ paramBuilder.append( allSplits[ 1 ] );
for ( int i = 2; i < allSplits.length; i++ ) {
- parameters += " " + allSplits[ i ];
+ paramBuilder.append( " " ).append( allSplits[ i ] );
}
- returnSplits[ 1 ] = parameters;
+ returnSplits[ 1 ] = paramBuilder.toString();
return returnSplits;
}
Copied: ctakes/trunk/ctakes-coreference-res/src/main/resources/org/apache/ctakes/coreference/pipeline/TsCorefSubPipe.piper (from r1815683, ctakes/trunk/ctakes-coreference-res/src/main/resources/org/apache/ctakes/coreference/pipeline/CorefSubPipe.piper)
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference-res/src/main/resources/org/apache/ctakes/coreference/pipeline/TsCorefSubPipe.piper?p2=ctakes/trunk/ctakes-coreference-res/src/main/resources/org/apache/ctakes/coreference/pipeline/TsCorefSubPipe.piper&p1=ctakes/trunk/ctakes-coreference-res/src/main/resources/org/apache/ctakes/coreference/pipeline/CorefSubPipe.piper&r1=1815683&r2=1816385&rev=1816385&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference-res/src/main/resources/org/apache/ctakes/coreference/pipeline/CorefSubPipe.piper (original)
+++ ctakes/trunk/ctakes-coreference-res/src/main/resources/org/apache/ctakes/coreference/pipeline/TsCorefSubPipe.piper Sun Nov 26 15:56:23 2017
@@ -3,8 +3,8 @@
// A Dependency Parser is necessary, but is usually added for assertion so don't add one here
// Constituency Parser adds Terminal Treebank Nodes, needed to create Markables
-add ConstituencyParser
+add concurrent.ThreadSafeConstituencyParser
add DeterministicMarkableAnnotator
-addDescription MarkableSalienceAnnotator /org/apache/ctakes/temporal/ae/salience/model.jar
-addDescription MentionClusterCoreferenceAnnotator /org/apache/ctakes/coreference/models/mention-cluster/model.jar
+addDescription concurrent.ThreadSafeMarkableSalienceAnnotator /org/apache/ctakes/temporal/ae/salience/model.jar
+addDescription concurrent.ThreadSafeMentionClusterCoreferencer /org/apache/ctakes/coreference/models/mention-cluster/model.jar