You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2017/03/01 00:22:33 UTC
svn commit: r1784840 [1/2] - in /ctakes/trunk:
ctakes-core-res/src/main/resources/org/apache/ctakes/core/list/
ctakes-core/ ctakes-core/src/main/java/org/apache/ctakes/core/ae/
ctakes-core/src/main/java/org/apache/ctakes/core/cc/
ctakes-core/src/main/j...
Author: seanfinan
Date: Wed Mar 1 00:22:32 2017
New Revision: 1784840
URL: http://svn.apache.org/viewvc?rev=1784840&view=rev
Log:
PipeBitInfo for core
CollectionMaps moved to core
Semantic check beyond tuis for dictionary
Added:
ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/list/DefaultListRegex.bsv
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/collection/
- copied from r1784837, ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/collection/
Removed:
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/collection/
Modified:
ctakes/trunk/ctakes-core/pom.xml
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/BsvRegexSectionizer.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/CDASegmentAnnotator.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/CopyAnnotator.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/DocumentIdPrinterAnalysisEngine.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/FilterAnnotator.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListAnnotator.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListSentenceFixer.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/NullAnnotator.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/OverlapAnnotator.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ParagraphAnnotator.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ParagraphSentenceFixer.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SectionSegmentAnnotator.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetector.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SimpleSegmentAnnotator.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SimpleSegmentWithTagsAnnotator.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/StartFinishLogger.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/TokenizerAnnotator.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/TokenizerAnnotatorPTB.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CasConsumer.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CuiCountFileWriter.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/FilesInDirectoryCasConsumer.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/HtmlTableCasConsumer.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/JdbcWriterTemplate.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/NormalizedFilesInDirectoryCasConsumer.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/SentenceTokensPrinter.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/TokenFreqCasConsumer.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/TokenOffsetsCasConsumer.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/XmiWriterCasConsumerCtakes.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriter.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriterFit.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriterUima.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriterFit.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriterUima.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FilesInDirectoryCollectionCyclicalReads.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/JdbcCollectionReader.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/LinesFromFileCollectionReader.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/LuceneCollectionReader.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/TextReader.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/XMIReader.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/XmiCollectionReaderCtakes.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/collection/ArrayListMap.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/collection/CollectionCreator.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/collection/CollectionCreatorFactory.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/collection/CollectionMap.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/collection/DefaultCollectionMap.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/collection/EnumSetMap.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/collection/HashSetMap.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/collection/ImmutableCollectionMap.java
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/AbstractJCasTermAnnotator.java
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/DefaultJCasTermAnnotator.java
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/JCasTermAnnotator.java
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/OverlapJCasTermAnnotator.java
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/ae/WindowProcessor.java
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/concept/BsvConceptFactory.java
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/concept/DefaultConcept.java
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/concept/JdbcConceptFactory.java
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/AbstractTermConsumer.java
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/DefaultTermConsumer.java
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/PrecisionTermConsumer.java
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/SemanticCleanupTermConsumer.java
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/TermConsumer.java
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/BsvRareWordDictionary.java
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/MemRareWordDictionary.java
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/RareWordTermMapCreator.java
Added: ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/list/DefaultListRegex.bsv
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/list/DefaultListRegex.bsv?rev=1784840&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/list/DefaultListRegex.bsv (added)
+++ ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/list/DefaultListRegex.bsv Wed Mar 1 00:22:32 2017
@@ -0,0 +1,5 @@
+Numbered List||(?:^[ ]*[\d]{1,2}(?::|\.)[\t ]+(?:(?!^[ ]*[\d]{1,2}(?::|\.)[\t ]+)(?:[^\t\r\n]+\r?\n))+){2,}||(?:^[ ]*[\d]{1,2}(?::|\.)[\t ]+(?:(?!^[ ]*[\d]{1,2}(?::|\.)[\t ]+)(?:[^\t\r\n]+\r?\n))+)
+Alpha Sentence List||(?:^[ ]*[A-Z](?::|\.)+\)?[\t ]+(?:[^\t\n\.]+(?:\.|\n))+\r?\n){2,}||(?:^[ ]*[A-Z](?::|\.)+\)?[\t ]+(?:[^\t\n\.]+(?:\.|\n))+\r?\n)
+// Name Value List||(?:^[ ]*[^\t\r\n ]+:[\t ]+(?:[^\t\r\n:]+\r?\n)+){3,}||(?:^[ ]*[^\t\r\n ]+:[\t ]+(?:[^\t\r\n:]+\r?\n)+)
+Name Value List||(?:^[^\t\r\n]{2,50}:[\t ]+(?:[^\t\r\n:]+\r?\n)+){3,}||(?:^[^\t\r\n]{2,50}:[\t ]+(?:[^\t\r\n:]+\r?\n)+)
+Multi Column List||(?:^(?:[^\t\r\n :]+(?: [^\t\r\n :]+)*(?:\t+| {3,}))+(?:[^\t\r\n ]+(?: [^\t\r\n ]+)*)[\t ]*\r?\n){3,}||\r?\n
Modified: ctakes/trunk/ctakes-core/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/pom.xml?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/pom.xml (original)
+++ ctakes/trunk/ctakes-core/pom.xml Wed Mar 1 00:22:32 2017
@@ -55,6 +55,7 @@
<groupId>jdom</groupId>
<artifactId>jdom</artifactId>
</dependency>
+ <!-- Todo - get rid of jdom v1 -->
<dependency>
<groupId>org.jdom</groupId>
<artifactId>jdom2</artifactId>
@@ -79,6 +80,7 @@
<groupId>org.apache.opennlp</groupId>
<artifactId>opennlp-tools</artifactId>
</dependency>
+ <!-- Todo : is lucene necessary at this level? -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
@@ -91,6 +93,7 @@
<groupId>edu.mit.findstruct</groupId>
<artifactId>findstructapi</artifactId>
</dependency>
+ <!-- Todo : is anything actually using this sqlwrapper? -->
<dependency>
<groupId>com.googlecode.armbrust-file-utils</groupId>
<artifactId>sqlwrapper</artifactId>
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/BsvRegexSectionizer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/BsvRegexSectionizer.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/BsvRegexSectionizer.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/BsvRegexSectionizer.java Wed Mar 1 00:22:32 2017
@@ -1,6 +1,7 @@
package org.apache.ctakes.core.ae;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.log4j.Logger;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
@@ -18,6 +19,11 @@ import java.io.InputStreamReader;
* @version %I%
* @since 10/5/2016
*/
+@PipeBitInfo(
+ name = "Regex Sectionizer (BSV)",
+ description = "Annotates Document Sections by detecting Section Headers using Regular Expressions provided in a Bar-Separated-Value (BSV) File.",
+ output = "Segment Annotations."
+)
public class BsvRegexSectionizer extends RegexSectionizer {
static private final Logger LOGGER = Logger.getLogger( "BsvRegexSectionizer" );
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/CDASegmentAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/CDASegmentAnnotator.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/CDASegmentAnnotator.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/CDASegmentAnnotator.java Wed Mar 1 00:22:32 2017
@@ -19,36 +19,35 @@
package org.apache.ctakes.core.ae;
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.net.MalformedURLException;
-import java.net.URI;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
/**
* Creates segment annotations based on the ccda_sections.txt file Which is
* based on HL7/CCDA/LONIC standard headings Additional custom heading names can
* be added to the file.
*/
+@PipeBitInfo(
+ name = "Segment Annotator (CDA)",
+ description = "Annotates Document Sections by detecting Section Headers using Regular Expressions provided in a File.",
+ output = "Segment Annotations."
+)
public class CDASegmentAnnotator extends JCasAnnotator_ImplBase {
Logger logger = Logger.getLogger(this.getClass());
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/CopyAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/CopyAnnotator.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/CopyAnnotator.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/CopyAnnotator.java Wed Mar 1 00:22:32 2017
@@ -18,23 +18,24 @@
*/
package org.apache.ctakes.core.ae;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.Method;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.util.ParamUtil;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.cas.TOP;
-import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.TOP;
+import org.apache.uima.resource.ResourceInitializationException;
+
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
/**
@@ -44,6 +45,12 @@ import org.apache.uima.fit.util.JCasUtil
* @author Mayo Clinic
*
*/
+@PipeBitInfo(
+ name = "JCas Copy Annotator",
+ description = "Copies document text and all annotations into a new JCas.",
+ input = "JCas.",
+ output = "JCas copy."
+)
public class CopyAnnotator extends JCasAnnotator_ImplBase {
public static final String PARAM_SOURCE_CLASS = "srcObjClass";
@ConfigurationParameter(
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/DocumentIdPrinterAnalysisEngine.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/DocumentIdPrinterAnalysisEngine.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/DocumentIdPrinterAnalysisEngine.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/DocumentIdPrinterAnalysisEngine.java Wed Mar 1 00:22:32 2017
@@ -19,13 +19,19 @@
package org.apache.ctakes.core.ae;
-import org.apache.log4j.Logger;
-
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.log4j.Logger;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.jcas.JCas;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.jcas.JCas;
+@PipeBitInfo(
+ name = "Document ID Printer",
+ description = "Logs the Document ID to Log4j and Standard Output.",
+ role = PipeBitInfo.Role.SPECIAL,
+ output = PipeBitInfo.NO_OUTPUT
+)
public class DocumentIdPrinterAnalysisEngine extends JCasAnnotator_ImplBase
{
protected final Logger logger = Logger.getLogger(DocumentIdPrinterAnalysisEngine.class.getName());
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/FilterAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/FilterAnnotator.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/FilterAnnotator.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/FilterAnnotator.java Wed Mar 1 00:22:32 2017
@@ -24,6 +24,10 @@ package org.apache.ctakes.core.ae;
*
* @author m039575
*/
+
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -33,9 +37,12 @@ import org.apache.uima.jcas.JFSIndexRepo
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.ctakes.typesystem.type.textsem.EntityMention;
-import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-
+@PipeBitInfo(
+ name = "Annotation Remover",
+ description = "Removes annotations of a given type from the JCas.",
+ input = PipeBitInfo.POPULATED_JCAS,
+ output = "JCas without annotations of the given type."
+)
public class FilterAnnotator extends JCasAnnotator_ImplBase {
// TODO parameterize retainAttrTypeId = DISORDER_ANNOTATIONS = 2
private static int DISORDER_ANNOTATIONS = 2;
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListAnnotator.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListAnnotator.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListAnnotator.java Wed Mar 1 00:22:32 2017
@@ -1,5 +1,6 @@
package org.apache.ctakes.core.ae;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.core.util.Pair;
import org.apache.ctakes.core.util.regex.RegexSpanFinder;
@@ -32,6 +33,11 @@ import java.util.stream.Collectors;
* @version %I%
* @since 9/26/2016
*/
+@PipeBitInfo(
+ name = "List Annotator",
+ description = "Annotates formatted List Sections by detecting them using Regular Expressions provided in an input File.",
+ output = "List and ListEntry annotations."
+)
final public class ListAnnotator extends JCasAnnotator_ImplBase {
static private final Logger LOGGER = Logger.getLogger( "ListAnnotator" );
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListSentenceFixer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListSentenceFixer.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListSentenceFixer.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListSentenceFixer.java Wed Mar 1 00:22:32 2017
@@ -1,5 +1,6 @@
package org.apache.ctakes.core.ae;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.typesystem.type.textspan.List;
import org.apache.ctakes.typesystem.type.textspan.ListEntry;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
@@ -18,6 +19,12 @@ import java.util.regex.Pattern;
* @version %I%
* @since 9/28/2016
*/
+@PipeBitInfo(
+ name = "List Sentence Fixer",
+ description = "Re-annotates Sentences based upon existing List Entries, preventing a Sentence from spanning more than one List Entry.",
+ input = "Sentence and List Entry annotations.",
+ output = "Adjusted Sentences."
+)
final public class ListSentenceFixer extends JCasAnnotator_ImplBase {
static private final Logger LOGGER = Logger.getLogger( "ListSentenceFixer" );
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/NullAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/NullAnnotator.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/NullAnnotator.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/NullAnnotator.java Wed Mar 1 00:22:32 2017
@@ -18,9 +18,10 @@
*/
package org.apache.ctakes.core.ae;
-import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.jcas.JCas;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
/**
* This annotator does nothing. The process method is overridden but is empty.
@@ -31,7 +32,12 @@ import org.apache.uima.jcas.JCas;
* @author Mayo Clinic
*
*/
-public class NullAnnotator extends JCasAnnotator_ImplBase
+@PipeBitInfo(
+ name = "Null Annotator",
+ description = "Does absolutely nothing.",
+ output = PipeBitInfo.NO_OUTPUT
+)
+public class NullAnnotator extends JCasAnnotator_ImplBase
{
public void process(JCas jcas)
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/OverlapAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/OverlapAnnotator.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/OverlapAnnotator.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/OverlapAnnotator.java Wed Mar 1 00:22:32 2017
@@ -18,12 +18,7 @@
*/
package org.apache.ctakes.core.ae;
-import java.util.ArrayList;
-import java.util.BitSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.StringTokenizer;
-
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.util.JCasUtil;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
@@ -35,6 +30,8 @@ import org.apache.uima.jcas.JFSIndexRepo
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
+import java.util.*;
+
/*
* When a given type of overlap is found between two annotations
@@ -56,6 +53,12 @@ import org.apache.uima.resource.Resource
* For example, a sentence should not end in the middle of a decimal number.
*
*/
+@PipeBitInfo(
+ name = "Overlap Annotator",
+ description = "Removes or modifies annotations that overlap.",
+ input = PipeBitInfo.POPULATED_JCAS,
+ output = "Removed or modified annotations."
+)
public class OverlapAnnotator extends JCasAnnotator_ImplBase {
// LOG4J logger based on class name
private Logger iv_logger = Logger.getLogger(getClass().getName());
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ParagraphAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ParagraphAnnotator.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ParagraphAnnotator.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ParagraphAnnotator.java Wed Mar 1 00:22:32 2017
@@ -1,5 +1,6 @@
package org.apache.ctakes.core.ae;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.core.util.Pair;
import org.apache.ctakes.core.util.regex.RegexSpanFinder;
@@ -25,6 +26,11 @@ import java.util.regex.Pattern;
* @version %I%
* @since 9/23/2016
*/
+@PipeBitInfo(
+ name = "Paragraph Annotator",
+ description = "Annotates Paragraphs by detecting them using Regular Expressions provided in an input File or by empty text lines.",
+ output = "Paragraph annotations."
+)
final public class ParagraphAnnotator extends JCasAnnotator_ImplBase {
static private final Logger LOGGER = Logger.getLogger( "ParagraphAnnotator" );
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ParagraphSentenceFixer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ParagraphSentenceFixer.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ParagraphSentenceFixer.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ParagraphSentenceFixer.java Wed Mar 1 00:22:32 2017
@@ -1,5 +1,6 @@
package org.apache.ctakes.core.ae;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.util.Pair;
import org.apache.ctakes.typesystem.type.textspan.Paragraph;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
@@ -23,6 +24,12 @@ import java.util.stream.Collectors;
* @version %I%
* @since 10/6/2016
*/
+@PipeBitInfo(
+ name = "Paragraph Sentence Fixer",
+ description = "Re-annotates Sentences based upon existing Paragraphs, preventing a Sentence from spanning more than one Paragraph.",
+ input = "Sentence and Paragraph annotations.",
+ output = "Adjusted Sentences."
+)
final public class ParagraphSentenceFixer extends JCasAnnotator_ImplBase {
static private final Logger LOGGER = Logger.getLogger( "ParagraphSentenceFixer" );
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java Wed Mar 1 00:22:32 2017
@@ -18,94 +18,20 @@
*/
package org.apache.ctakes.core.ae;
-import java.io.File;
-import java.io.IOException;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
+import com.google.common.base.Charsets;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+import com.google.common.io.Files;
import org.apache.ctakes.core.knowtator.KnowtatorAnnotation;
import org.apache.ctakes.core.knowtator.KnowtatorXMLParser;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.util.SHARPKnowtatorXMLDefaults;
import org.apache.ctakes.typesystem.type.constants.CONST;
-import org.apache.ctakes.typesystem.type.refsem.BodyLaterality;
-import org.apache.ctakes.typesystem.type.refsem.BodySide;
-import org.apache.ctakes.typesystem.type.refsem.Course;
+import org.apache.ctakes.typesystem.type.refsem.*;
import org.apache.ctakes.typesystem.type.refsem.Date;
-import org.apache.ctakes.typesystem.type.refsem.Event;
-import org.apache.ctakes.typesystem.type.refsem.EventProperties;
-import org.apache.ctakes.typesystem.type.refsem.LabDeltaFlag;
-import org.apache.ctakes.typesystem.type.refsem.LabReferenceRange;
-import org.apache.ctakes.typesystem.type.refsem.LabValue;
-import org.apache.ctakes.typesystem.type.refsem.MedicationDosage;
-import org.apache.ctakes.typesystem.type.refsem.MedicationDuration;
-import org.apache.ctakes.typesystem.type.refsem.MedicationForm;
-import org.apache.ctakes.typesystem.type.refsem.MedicationFrequency;
-import org.apache.ctakes.typesystem.type.refsem.MedicationRoute;
-import org.apache.ctakes.typesystem.type.refsem.MedicationStatusChange;
-import org.apache.ctakes.typesystem.type.refsem.MedicationStrength;
-import org.apache.ctakes.typesystem.type.refsem.OntologyConcept;
-import org.apache.ctakes.typesystem.type.refsem.ProcedureDevice;
-import org.apache.ctakes.typesystem.type.refsem.ProcedureMethod;
-import org.apache.ctakes.typesystem.type.refsem.Severity;
-import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
-import org.apache.ctakes.typesystem.type.relation.AffectsTextRelation;
-import org.apache.ctakes.typesystem.type.relation.AspectualTextRelation;
-import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
-import org.apache.ctakes.typesystem.type.relation.CausesBringsAboutTextRelation;
-import org.apache.ctakes.typesystem.type.relation.ComplicatesDisruptsTextRelation;
-import org.apache.ctakes.typesystem.type.relation.DegreeOfTextRelation;
-import org.apache.ctakes.typesystem.type.relation.IndicatesTextRelation;
-import org.apache.ctakes.typesystem.type.relation.LocationOfTextRelation;
-import org.apache.ctakes.typesystem.type.relation.ManagesTreatsTextRelation;
-import org.apache.ctakes.typesystem.type.relation.ManifestationOfTextRelation;
-import org.apache.ctakes.typesystem.type.relation.PreventsTextRelation;
-import org.apache.ctakes.typesystem.type.relation.RelationArgument;
-import org.apache.ctakes.typesystem.type.relation.ResultOfTextRelation;
-import org.apache.ctakes.typesystem.type.relation.TemporalTextRelation;
+import org.apache.ctakes.typesystem.type.relation.*;
import org.apache.ctakes.typesystem.type.structured.DocumentID;
-import org.apache.ctakes.typesystem.type.textsem.AnatomicalSiteMention;
-import org.apache.ctakes.typesystem.type.textsem.BodyLateralityModifier;
-import org.apache.ctakes.typesystem.type.textsem.BodySideModifier;
-import org.apache.ctakes.typesystem.type.textsem.ConditionalModifier;
-import org.apache.ctakes.typesystem.type.textsem.CourseModifier;
-import org.apache.ctakes.typesystem.type.textsem.DiseaseDisorderMention;
-import org.apache.ctakes.typesystem.type.textsem.EntityMention;
-import org.apache.ctakes.typesystem.type.textsem.EventMention;
-import org.apache.ctakes.typesystem.type.textsem.GenericModifier;
-import org.apache.ctakes.typesystem.type.textsem.HistoryOfModifier;
-import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.LabDeltaFlagModifier;
-import org.apache.ctakes.typesystem.type.textsem.LabEstimatedModifier;
-import org.apache.ctakes.typesystem.type.textsem.LabInterpretationModifier;
-import org.apache.ctakes.typesystem.type.textsem.LabMention;
-import org.apache.ctakes.typesystem.type.textsem.LabReferenceRangeModifier;
-import org.apache.ctakes.typesystem.type.textsem.LabValueModifier;
-import org.apache.ctakes.typesystem.type.textsem.MedicationAllergyModifier;
-import org.apache.ctakes.typesystem.type.textsem.MedicationDosageModifier;
-import org.apache.ctakes.typesystem.type.textsem.MedicationDurationModifier;
-import org.apache.ctakes.typesystem.type.textsem.MedicationFormModifier;
-import org.apache.ctakes.typesystem.type.textsem.MedicationFrequencyModifier;
-import org.apache.ctakes.typesystem.type.textsem.MedicationMention;
-import org.apache.ctakes.typesystem.type.textsem.MedicationRouteModifier;
-import org.apache.ctakes.typesystem.type.textsem.MedicationStatusChangeModifier;
-import org.apache.ctakes.typesystem.type.textsem.MedicationStrengthModifier;
-import org.apache.ctakes.typesystem.type.textsem.Modifier;
-import org.apache.ctakes.typesystem.type.textsem.PolarityModifier;
-import org.apache.ctakes.typesystem.type.textsem.ProcedureDeviceModifier;
-import org.apache.ctakes.typesystem.type.textsem.ProcedureMention;
-import org.apache.ctakes.typesystem.type.textsem.ProcedureMethodModifier;
-import org.apache.ctakes.typesystem.type.textsem.SeverityModifier;
-import org.apache.ctakes.typesystem.type.textsem.SignSymptomMention;
-import org.apache.ctakes.typesystem.type.textsem.SubjectModifier;
-import org.apache.ctakes.typesystem.type.textsem.TimeMention;
-import org.apache.ctakes.typesystem.type.textsem.UncertaintyModifier;
+import org.apache.ctakes.typesystem.type.textsem.*;
import org.apache.log4j.Logger;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -123,11 +49,18 @@ import org.apache.uima.jcas.tcas.Annotat
import org.apache.uima.util.UriUtils;
import org.jdom2.JDOMException;
-import com.google.common.base.Charsets;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
-import com.google.common.io.Files;
+import java.io.File;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.*;
+@PipeBitInfo(
+ name = "Knowtator XML Reader",
+ description = "Reads annotations from Knowtator XML files in a directory.",
+ role = PipeBitInfo.Role.SPECIAL,
+ output = PipeBitInfo.POPULATED_JCAS
+)
public class SHARPKnowtatorXMLReader extends JCasAnnotator_ImplBase {
static Logger LOGGER = Logger.getLogger(SHARPKnowtatorXMLReader.class);
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SectionSegmentAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SectionSegmentAnnotator.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SectionSegmentAnnotator.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SectionSegmentAnnotator.java Wed Mar 1 00:22:32 2017
@@ -18,11 +18,13 @@
*/
package org.apache.ctakes.core.ae;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.util.ArrayList;
-import java.util.HashMap;
-
+import findstruct.Section;
+import findstruct.StructFinder;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.ctakes.core.resource.FileResource;
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.ctakes.core.util.DocumentSection;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
@@ -30,13 +32,10 @@ import org.apache.uima.analysis_engine.A
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
-
-import org.apache.ctakes.core.resource.FileResource;
-import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
-import org.apache.ctakes.core.util.DocumentSection;
-import org.apache.ctakes.typesystem.type.textspan.Segment;
-import findstruct.Section;
-import findstruct.StructFinder;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.util.ArrayList;
+import java.util.HashMap;
/**
* Creates a single segment annotation that spans the entire document. This is
@@ -45,6 +44,11 @@ import findstruct.StructFinder;
*
* @author Mayo Clinic
*/
+@PipeBitInfo(
+ name = "Section Annotator",
+ description = "Annotates Document Sections by detecting Section Headers in template.",
+ output = "Segment Annotations."
+)
public class SectionSegmentAnnotator extends JCasAnnotator_ImplBase {
private String segmentId;
private StructFinder structureFinder;
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetector.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetector.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetector.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetector.java Wed Mar 1 00:22:32 2017
@@ -23,6 +23,7 @@ import opennlp.tools.sentdetect.*;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.TrainingParameters;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.core.sentence.EndOfSentenceScannerImpl;
import org.apache.ctakes.core.sentence.SentenceDetectorCtakes;
@@ -50,6 +51,11 @@ import java.util.*;
*
* @author Mayo Clinic
*/
+@PipeBitInfo(
+ name = "Sentence Detector",
+ description = "Annotates Sentences based upon an OpenNLP model.",
+ output = "Sentence annotations."
+)
public class SentenceDetector extends JCasAnnotator_ImplBase {
/**
* Value is "SegmentsToSkip". This parameter specifies which sections to
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SimpleSegmentAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SimpleSegmentAnnotator.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SimpleSegmentAnnotator.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SimpleSegmentAnnotator.java Wed Mar 1 00:22:32 2017
@@ -18,15 +18,16 @@
*/
package org.apache.ctakes.core.ae;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
/**
* Creates a single segment annotation that spans the entire document. This is
@@ -35,6 +36,11 @@ import org.apache.uima.fit.factory.Analy
*
* @author Mayo Clinic
*/
+@PipeBitInfo(
+ name = "Single Segment Annotator",
+ description = "Annotates Document as a single Section.",
+ output = "Segment annotation."
+)
public class SimpleSegmentAnnotator extends JCasAnnotator_ImplBase {
public static final String PARAM_SEGMENT_ID = "SegmentID";
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SimpleSegmentWithTagsAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SimpleSegmentWithTagsAnnotator.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SimpleSegmentWithTagsAnnotator.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SimpleSegmentWithTagsAnnotator.java Wed Mar 1 00:22:32 2017
@@ -18,9 +18,9 @@
*/
package org.apache.ctakes.core.ae;
-import java.io.BufferedReader;
-import java.io.StringReader;
-
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
@@ -28,8 +28,8 @@ import org.apache.uima.analysis_engine.A
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
-import org.apache.ctakes.typesystem.type.textspan.Segment;
+import java.io.BufferedReader;
+import java.io.StringReader;
/**
* Creates a single segment annotation that spans the entire document. This is
@@ -38,6 +38,11 @@ import org.apache.ctakes.typesystem.type
*
* @author Mayo Clinic
*/
+@PipeBitInfo(
+ name = "Segment Annotator (Tag)",
+ description = "Annotates Document Sections by detecting start and end Section Tags.",
+ output = "Segment Annotations."
+)
public class SimpleSegmentWithTagsAnnotator extends JCasAnnotator_ImplBase {
private String segmentId;
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/StartFinishLogger.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/StartFinishLogger.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/StartFinishLogger.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/StartFinishLogger.java Wed Mar 1 00:22:32 2017
@@ -1,5 +1,6 @@
package org.apache.ctakes.core.ae;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.AnalysisComponent;
@@ -20,13 +21,18 @@ import org.apache.uima.resource.Resource
* @version %I%
* @since 8/8/2016
*/
+@PipeBitInfo(
+ name = "Start or Finish Logger",
+ description = "Simple Annotator to place before and after other annotators that do not Log their Start and Finish.",
+ role = PipeBitInfo.Role.SPECIAL,
+ output = PipeBitInfo.NO_OUTPUT
+)
public class StartFinishLogger extends JCasAnnotator_ImplBase {
public static final String PARAM_LOGGER_NAME = "LOGGER_NAME";
@ConfigurationParameter(
name = PARAM_LOGGER_NAME,
- mandatory = true,
description = "provides the full name of the Annotator Engine for which start / end logging should be done.",
defaultValue = { "StartEndProgressLogger" }
)
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/TokenizerAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/TokenizerAnnotator.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/TokenizerAnnotator.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/TokenizerAnnotator.java Wed Mar 1 00:22:32 2017
@@ -18,11 +18,13 @@
*/
package org.apache.ctakes.core.ae;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
+import org.apache.ctakes.core.nlp.tokenizer.Token;
+import org.apache.ctakes.core.nlp.tokenizer.Tokenizer;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.ctakes.core.resource.StringIntegerMapResource;
+import org.apache.ctakes.core.util.ParamUtil;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
@@ -34,19 +36,21 @@ import org.apache.uima.jcas.tcas.Annotat
import org.apache.uima.resource.ResourceAccessException;
import org.apache.uima.resource.ResourceInitializationException;
-
-import org.apache.ctakes.core.nlp.tokenizer.Token;
-import org.apache.ctakes.core.nlp.tokenizer.Tokenizer;
-import org.apache.ctakes.core.resource.StringIntegerMapResource;
-import org.apache.ctakes.core.util.ParamUtil;
-import org.apache.ctakes.typesystem.type.syntax.BaseToken;
-import org.apache.ctakes.typesystem.type.textspan.Segment;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
/**
* UIMA annotator that uses the Tokenizer module.
*
* @author Mayo Clinic
*/
+@PipeBitInfo(
+ name = "Tokenizer Annotator",
+ description = "Annotates Document Tokens.",
+ output = "Token annotations."
+)
public class TokenizerAnnotator extends JCasAnnotator_ImplBase {
// LOG4J logger based on class name
private Logger logger = Logger.getLogger(getClass().getName());
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/TokenizerAnnotatorPTB.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/TokenizerAnnotatorPTB.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/TokenizerAnnotatorPTB.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/TokenizerAnnotatorPTB.java Wed Mar 1 00:22:32 2017
@@ -1,100 +1,102 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.ctakes.core.ae;
-
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.ctakes.core.nlp.tokenizer.TokenizerPTB;
-import org.apache.ctakes.typesystem.type.syntax.BaseToken;
-import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
-import org.apache.ctakes.typesystem.type.textspan.Segment;
-import org.apache.ctakes.typesystem.type.textspan.Sentence;
-import org.apache.log4j.Logger;
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_engine.AnalysisEngineDescription;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
-import org.apache.uima.fit.descriptor.ConfigurationParameter;
-import org.apache.uima.fit.factory.AnalysisEngineFactory;
-import org.apache.uima.fit.util.JCasUtil;
-
-/**
- * UIMA annotator that tokenizes based on Penn Treebank rules.
- *
- * @author Mayo Clinic
- */
-public class TokenizerAnnotatorPTB extends JCasAnnotator_ImplBase
-{
- // LOG4J logger based on class name
- private Logger logger = Logger.getLogger(getClass().getName());
-
- /**
- * Value is "SegmentsToSkip". This parameter specifies which segments to skip. The parameter should be
- * of type String, should be multi-valued and optional.
- */
- public static final String PARAM_SEGMENTS_TO_SKIP = "SegmentsToSkip";
- @ConfigurationParameter(
- name = PARAM_SEGMENTS_TO_SKIP,
- mandatory = false,
- description = "Set of segments that can be skipped"
- )
- private String[] skipSegmentsArray;
- private Set<String> skipSegmentsSet;
-
- private TokenizerPTB tokenizer;
-
- private int tokenCount = 0;
-
- @Override
- public void initialize(UimaContext aContext) throws ResourceInitializationException {
- super.initialize(aContext);
- logger.info("Initializing " + this.getClass().getName());
- tokenizer = new TokenizerPTB();
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.core.ae;
+
+import org.apache.ctakes.core.nlp.tokenizer.TokenizerPTB;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+
+import java.util.*;
+
+/**
+ * UIMA annotator that tokenizes based on Penn Treebank rules.
+ *
+ * @author Mayo Clinic
+ */
+@PipeBitInfo(
+ name = "Tokenizer Annotator (PTB)",
+ description = "Annotates Document Penn TreeBank Tokens.",
+ output = "Token annotations."
+)
+public class TokenizerAnnotatorPTB extends JCasAnnotator_ImplBase
+{
+ // LOG4J logger based on class name
+ private Logger logger = Logger.getLogger(getClass().getName());
+
+ /**
+ * Value is "SegmentsToSkip". This parameter specifies which segments to skip. The parameter should be
+ * of type String, should be multi-valued and optional.
+ */
+ public static final String PARAM_SEGMENTS_TO_SKIP = "SegmentsToSkip";
+ @ConfigurationParameter(
+ name = PARAM_SEGMENTS_TO_SKIP,
+ mandatory = false,
+ description = "Set of segments that can be skipped"
+ )
+ private String[] skipSegmentsArray;
+ private Set<String> skipSegmentsSet;
+
+ private TokenizerPTB tokenizer;
+
+ private int tokenCount = 0;
+
+ @Override
+ public void initialize(UimaContext aContext) throws ResourceInitializationException {
+ super.initialize(aContext);
+ logger.info("Initializing " + this.getClass().getName());
+ tokenizer = new TokenizerPTB();
skipSegmentsSet = new HashSet<>();
- if(skipSegmentsArray != null){
- Collections.addAll(skipSegmentsSet, skipSegmentsArray);
- }
- }
-
- /**
- * Entry point for processing.
- */
- @Override
- public void process(JCas jcas) throws AnalysisEngineProcessException {
-
- logger.info("process(JCas) in " + this.getClass().getName());
-
- tokenCount = 0;
-
- Collection<Segment> segments = JCasUtil.select(jcas, Segment.class);
- for(Segment sa : segments){
- String segmentID = sa.getId();
+ if(skipSegmentsArray != null){
+ Collections.addAll(skipSegmentsSet, skipSegmentsArray);
+ }
+ }
+
+ /**
+ * Entry point for processing.
+ */
+ @Override
+ public void process(JCas jcas) throws AnalysisEngineProcessException {
+
+ logger.info("process(JCas) in " + this.getClass().getName());
+
+ tokenCount = 0;
+
+ Collection<Segment> segments = JCasUtil.select(jcas, Segment.class);
+ for(Segment sa : segments){
+ String segmentID = sa.getId();
if (!skipSegmentsSet.contains(segmentID)) {
- annotateRange(jcas, sa.getBegin(), sa.getEnd());
- }
- }
+ annotateRange(jcas, sa.getBegin(), sa.getEnd());
+ }
+ }
}
@@ -105,13 +107,13 @@ public class TokenizerAnnotatorPTB exten
* Tokenizes a range of text, adding the tokens to the CAS
* Tokenizes one sentence at a time. Only tokenizes what is within Sentence annotation.
* There must have been Sentence annotations created beforehand in order for this method
- * to tokenize anything.
+ * to tokenize anything.
* @throws AnalysisEngineProcessException
*/
protected void annotateRange(JCas jcas, int rangeBegin, int rangeEnd) throws AnalysisEngineProcessException {
// int tokenCount = 0; // can't start with tokenCount=0 here because this method can be called multiple times
-
+
// First look for all newlines and carriage returns (which are not contained within sentences)
String docText = jcas.getDocumentText();
for (int i = rangeBegin; i<rangeEnd; i++) {
@@ -138,9 +140,9 @@ public class TokenizerAnnotatorPTB exten
}
// Now process each sentence
- Collection<Sentence> sentences = JCasUtil.select(jcas, Sentence.class);
+ Collection<Sentence> sentences = JCasUtil.select(jcas, Sentence.class);
- // Tokenize each sentence, adding the tokens to the cas index
+ // Tokenize each sentence, adding the tokens to the cas index
for(Sentence sentence : sentences){
if (sentence.getBegin() < rangeBegin || sentence.getEnd() > rangeEnd) {
continue;
@@ -152,11 +154,11 @@ public class TokenizerAnnotatorPTB exten
e.printStackTrace();
} else{
//logger.info("Token #" + tokenCount + " len = " + bta.getCoveredText().length() + " " + bta.getCoveredText());
- // add the BaseToken to CAS index
+ // add the BaseToken to CAS index
if(BaseToken.class.isAssignableFrom(bta.getClass())){
- BaseToken.class.cast(bta).addToIndexes();
- }else{
- throw new AnalysisEngineProcessException("Token returned cannot be cast as BaseToken", new Object[]{bta});
+ BaseToken.class.cast(bta).addToIndexes();
+ }else{
+ throw new AnalysisEngineProcessException("Token returned cannot be cast as BaseToken", new Object[]{bta});
}
//tokenCount++;
}
@@ -165,7 +167,7 @@ public class TokenizerAnnotatorPTB exten
}
// Now add the tokenNumber in the order of offsets
- Collection<BaseToken> tokens = JCasUtil.select(jcas, BaseToken.class);
+ Collection<BaseToken> tokens = JCasUtil.select(jcas, BaseToken.class);
for(BaseToken bta : tokens){
if (bta.getBegin()>=rangeBegin && bta.getBegin()<rangeEnd) {
bta.setTokenNumber(tokenCount);
@@ -173,10 +175,10 @@ public class TokenizerAnnotatorPTB exten
}
}
- }
-
- public static AnalysisEngineDescription createAnnotatorDescription() throws ResourceInitializationException{
- return AnalysisEngineFactory.createEngineDescription(TokenizerAnnotatorPTB.class);
+ }
+
+ public static AnalysisEngineDescription createAnnotatorDescription() throws ResourceInitializationException{
+ return AnalysisEngineFactory.createEngineDescription(TokenizerAnnotatorPTB.class);
}
}
@@ -308,4 +310,4 @@ public class TokenizerAnnotatorPTB exten
// uni-
// vice-
// -wise
-
+
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CasConsumer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CasConsumer.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CasConsumer.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CasConsumer.java Wed Mar 1 00:22:32 2017
@@ -18,6 +18,7 @@
*/
package org.apache.ctakes.core.cc;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
import org.apache.log4j.Logger;
import org.apache.uima.cas.CAS;
@@ -37,6 +38,13 @@ import java.io.OutputStream;
*
* @author Mayo Clinic
*/
+@PipeBitInfo(
+ name = "XMI Writer (CasConsumer)",
+ description = "Writes XMI files with full representation of input text and all extracted information.",
+ role = PipeBitInfo.Role.WRITER,
+ input = PipeBitInfo.POPULATED_JCAS,
+ output = PipeBitInfo.NO_OUTPUT
+)
public class CasConsumer extends CasConsumer_ImplBase {
// LOG4J logger based on class name
private Logger iv_logger = Logger.getLogger( getClass().getName() );
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CuiCountFileWriter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CuiCountFileWriter.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CuiCountFileWriter.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CuiCountFileWriter.java Wed Mar 1 00:22:32 2017
@@ -1,5 +1,6 @@
package org.apache.ctakes.core.cc;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
import org.apache.ctakes.core.util.OntologyConceptUtil;
import org.apache.ctakes.typesystem.type.constants.CONST;
@@ -32,6 +33,14 @@ import static org.apache.ctakes.core.con
* @version %I%
* @since 7/29/2016
*/
+@PipeBitInfo(
+ name = "CUI Count Writer",
+ description = "Writes a two-column BSV file containing CUIs and their total counts in a document.",
+ role = PipeBitInfo.Role.WRITER,
+ input = "CUI " + PipeBitInfo.POPULATED_JCAS,
+ output = PipeBitInfo.NO_OUTPUT
+)
+// TODO extend AbstractOutputFileWriter
public class CuiCountFileWriter extends CasConsumer_ImplBase {
static private final Logger LOGGER = Logger.getLogger( "CuiCountFileWriter" );
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/FilesInDirectoryCasConsumer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/FilesInDirectoryCasConsumer.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/FilesInDirectoryCasConsumer.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/FilesInDirectoryCasConsumer.java Wed Mar 1 00:22:32 2017
@@ -18,6 +18,7 @@
*/
package org.apache.ctakes.core.cc;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
import org.apache.log4j.Logger;
import org.apache.uima.cas.CAS;
@@ -41,6 +42,13 @@ import java.io.*;
* FilesInDirectoryCasConsumer can now be used to write the plain text to local plain text files.
*/
+@PipeBitInfo(
+ name = "Document Text Writer",
+ description = "Writes Text files with original text from the document.",
+ role = PipeBitInfo.Role.WRITER,
+ input = "Document Text.",
+ output = PipeBitInfo.NO_OUTPUT
+)
public class FilesInDirectoryCasConsumer extends CasConsumer_ImplBase {
public static final String PARAM_OUTPUTDIR = "OutputDirectory";
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/HtmlTableCasConsumer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/HtmlTableCasConsumer.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/HtmlTableCasConsumer.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/HtmlTableCasConsumer.java Wed Mar 1 00:22:32 2017
@@ -18,21 +18,7 @@
*/
package org.apache.ctakes.core.cc;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileWriter;
-import java.lang.reflect.InvocationTargetException;
-import java.lang.reflect.Method;
-import java.util.ArrayList;
-import java.util.BitSet;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.StringTokenizer;
-
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.util.JCasUtil;
import org.apache.uima.cas.CAS;
import org.apache.uima.collection.CasConsumer_ImplBase;
@@ -41,6 +27,13 @@ import org.apache.uima.jcas.tcas.Annotat
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.ResourceProcessException;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.*;
+
/**
* Outputs an HTML table that visualizes the specified Annotation objects over
@@ -49,6 +42,13 @@ import org.apache.uima.resource.Resource
* @author Mayo Clinic
*
*/
+@PipeBitInfo(
+ name = "HTML Table Writer",
+ description = "Writes HTML files with a Table representation of extracted information.",
+ role = PipeBitInfo.Role.WRITER,
+ input = PipeBitInfo.POPULATED_JCAS,
+ output = PipeBitInfo.NO_OUTPUT
+)
public class HtmlTableCasConsumer extends CasConsumer_ImplBase
{
private File iv_outputDir;
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/JdbcWriterTemplate.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/JdbcWriterTemplate.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/JdbcWriterTemplate.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/JdbcWriterTemplate.java Wed Mar 1 00:22:32 2017
@@ -18,6 +18,7 @@
*/
package org.apache.ctakes.core.cc;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.util.OntologyConceptUtil;
import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
@@ -39,6 +40,13 @@ import java.util.*;
* @version %I%
* @since 1/8/2015
*/
+@PipeBitInfo(
+ name = "JDBC Writer (Template)",
+ description = "Stores extracted information and document metadata in a database.",
+ role = PipeBitInfo.Role.WRITER,
+ input = PipeBitInfo.POPULATED_JCAS,
+ output = PipeBitInfo.NO_OUTPUT
+)
public class JdbcWriterTemplate extends AbstractJdbcWriter {
static private final Logger LOGGER = Logger.getLogger( "JdbcWriterTemplate" );
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/NormalizedFilesInDirectoryCasConsumer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/NormalizedFilesInDirectoryCasConsumer.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/NormalizedFilesInDirectoryCasConsumer.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/NormalizedFilesInDirectoryCasConsumer.java Wed Mar 1 00:22:32 2017
@@ -18,6 +18,7 @@
*/
package org.apache.ctakes.core.cc;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
import org.apache.ctakes.core.util.WordTokenUtil;
import org.apache.ctakes.typesystem.type.syntax.WordToken;
@@ -44,6 +45,13 @@ import java.io.*;
* FilesInDirectoryCasConsumer can now be used to write the plain text to local plain text files.
*/
+@PipeBitInfo(
+ name = "Document Text Writer (Dir)",
+ description = "Writes Text files with original text from the document in a specified directory.",
+ role = PipeBitInfo.Role.WRITER,
+ input = "Document Text.",
+ output = PipeBitInfo.NO_OUTPUT
+)
public class NormalizedFilesInDirectoryCasConsumer extends CasConsumer_ImplBase {
public static final String PARAM_OUTPUTDIR = "OutputDirectory";
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/SentenceTokensPrinter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/SentenceTokensPrinter.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/SentenceTokensPrinter.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/SentenceTokensPrinter.java Wed Mar 1 00:22:32 2017
@@ -1,5 +1,6 @@
package org.apache.ctakes.core.cc;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
import org.apache.ctakes.core.util.JCasUtil;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
@@ -32,6 +33,13 @@ import static org.apache.ctakes.core.con
* @version %I%
* @since 9/22/2014
*/
+@PipeBitInfo(
+ name = "Sentences Writer",
+ description = "Writes Text files with original text from the document, sentence by sentence.",
+ role = PipeBitInfo.Role.WRITER,
+ input = "Sentence " + PipeBitInfo.POPULATED_JCAS,
+ output = PipeBitInfo.NO_OUTPUT
+)
public class SentenceTokensPrinter extends CasConsumer_ImplBase {
// LOG4J logger based on interface name
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/TokenFreqCasConsumer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/TokenFreqCasConsumer.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/TokenFreqCasConsumer.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/TokenFreqCasConsumer.java Wed Mar 1 00:22:32 2017
@@ -18,26 +18,21 @@
*/
package org.apache.ctakes.core.cc;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.TreeSet;
-
-
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
import org.apache.uima.cas.CAS;
import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.ResourceProcessException;
import org.apache.uima.util.ProcessTrace;
-import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.*;
/**
* This class creates a file that contains the frequencies of the word tokens found in a set
@@ -45,6 +40,13 @@ import org.apache.ctakes.typesystem.type
* file for any kind of annotation but only counts Token annotations at the moment.
*/
+@PipeBitInfo(
+ name = "Word Count Writer",
+ description = "Writes a two-column BSV file containing Words and their total counts in a document.",
+ role = PipeBitInfo.Role.WRITER,
+ input = "WordToken " + PipeBitInfo.POPULATED_JCAS,
+ output = PipeBitInfo.NO_OUTPUT
+)
public class TokenFreqCasConsumer extends CasConsumer_ImplBase
{
/**
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/TokenOffsetsCasConsumer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/TokenOffsetsCasConsumer.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/TokenOffsetsCasConsumer.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/TokenOffsetsCasConsumer.java Wed Mar 1 00:22:32 2017
@@ -18,24 +18,20 @@
*/
package org.apache.ctakes.core.cc;
-import java.io.BufferedOutputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.uima.cas.CAS;
import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.ResourceProcessException;
-import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
-import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import java.io.*;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
/**
* For each CAS a local file with the offsets of the BaseToken annotations is written to a directory specifed by a parameter.
@@ -55,6 +51,13 @@ import org.apache.ctakes.typesystem.type
* from Knowtator annotations.
*/
+@PipeBitInfo(
+ name = "Token Offset Writer",
+ description = "Writes a two-column BSV file containing Begin and End offsets of tokens in a document.",
+ role = PipeBitInfo.Role.WRITER,
+ input = "BaseToken " + PipeBitInfo.POPULATED_JCAS,
+ output = PipeBitInfo.NO_OUTPUT
+)
public class TokenOffsetsCasConsumer extends CasConsumer_ImplBase {
public static final String PARAM_OUTPUTDIR = "OutputDirectory";
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/XmiWriterCasConsumerCtakes.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/XmiWriterCasConsumerCtakes.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/XmiWriterCasConsumerCtakes.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/XmiWriterCasConsumerCtakes.java Wed Mar 1 00:22:32 2017
@@ -37,6 +37,7 @@ package org.apache.ctakes.core.cc;
* under the License.
*/
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
import org.apache.uima.UIMAFramework;
import org.apache.uima.UimaContext;
@@ -73,6 +74,13 @@ import java.net.URL;
* <li><code>OutputDirectory</code> - path to directory into which output files will be written</li>
* </ul>
*/
+@PipeBitInfo(
+ name = "XMI Writer (CasConsumerCtakes)",
+ description = "Writes XMI files with full representation of input text and all extracted information.",
+ role = PipeBitInfo.Role.WRITER,
+ input = PipeBitInfo.POPULATED_JCAS,
+ output = PipeBitInfo.NO_OUTPUT
+)
public class XmiWriterCasConsumerCtakes extends CasConsumer_ImplBase {
/**
* Name of configuration parameter that must be set to the path of a directory into which the
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriter.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriter.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriter.java Wed Mar 1 00:22:32 2017
@@ -85,16 +85,29 @@ final public class PrettyTextWriter {
} else {
outputFile = new File( _outputDirPath, docId + FILE_EXTENSION );
}
- try ( final BufferedWriter writer = new BufferedWriter( new FileWriter( outputFile ) ) ) {
- final Collection<Sentence> sentences = JCasUtil.select( jcas, Sentence.class );
+ writeFile( jcas, outputFile.getPath() );
+// try ( final BufferedWriter writer = new BufferedWriter( new FileWriter( outputFile ) ) ) {
+// final Collection<Sentence> sentences = JCasUtil.select( jcas, Sentence.class );
+// for ( Sentence sentence : sentences ) {
+// writeSentence( jcas, sentence, writer );
+// }
+// } catch ( IOException ioE ) {
+// LOGGER.error( "Could not not write pretty file " + outputFile.getPath() );
+// LOGGER.error( ioE.getMessage() );
+// }
+ LOGGER.info( "Finished processing" );
+ }
+
+ public void writeFile( final JCas jCas, final String outputFilePath ) {
+ try ( final BufferedWriter writer = new BufferedWriter( new FileWriter( outputFilePath ) ) ) {
+ final Collection<Sentence> sentences = JCasUtil.select( jCas, Sentence.class );
for ( Sentence sentence : sentences ) {
- writeSentence( jcas, sentence, writer );
+ writeSentence( jCas, sentence, writer );
}
} catch ( IOException ioE ) {
- LOGGER.error( "Could not not write pretty file " + outputFile.getPath() );
+ LOGGER.error( "Could not not write pretty file " + outputFilePath );
LOGGER.error( ioE.getMessage() );
}
- LOGGER.info( "Finished processing" );
}
/**
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriterFit.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriterFit.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriterFit.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriterFit.java Wed Mar 1 00:22:32 2017
@@ -2,18 +2,14 @@ package org.apache.ctakes.core.cc.pretty
//import org.apache.log4j.Logger;
+import org.apache.ctakes.core.cc.AbstractOutputFileWriter;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_engine.AnalysisEngineDescription;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.CASException;
-import org.apache.uima.fit.component.CasConsumer_ImplBase;
-import org.apache.uima.fit.descriptor.ConfigurationParameter;
-import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
-import static org.apache.ctakes.core.config.ConfigParameterConstants.DESC_OUTPUTDIR;
+import java.io.IOException;
+
import static org.apache.ctakes.core.config.ConfigParameterConstants.PARAM_OUTPUTDIR;
@@ -29,17 +25,15 @@ import static org.apache.ctakes.core.con
* @see org.apache.ctakes.core.cc.pretty.plaintext.PrettyTextWriter
* @since 7/8/2015
*/
-public class PrettyTextWriterFit extends CasConsumer_ImplBase {
-
- @ConfigurationParameter(
- name = PARAM_OUTPUTDIR,
- mandatory = false,
- description = DESC_OUTPUTDIR,
- defaultValue = ""
- )
- private String fitOutputDirectoryPath;
+@PipeBitInfo(
+ name = "Pretty Text Writer",
+ description = "Writes text files with document text and simple markups (POS, Semantic Group, CUI, Negation).",
+ role = PipeBitInfo.Role.WRITER,
+ input = PipeBitInfo.POPULATED_JCAS,
+ output = PipeBitInfo.NO_OUTPUT
+)
+final public class PrettyTextWriterFit extends AbstractOutputFileWriter {
-// static private final Logger LOGGER = Logger.getLogger( "PrettyTextWriterFit" );
// delegate
final private PrettyTextWriter _prettyTextWriter;
@@ -53,52 +47,59 @@ public class PrettyTextWriterFit extends
* {@inheritDoc}
*/
@Override
- public void initialize( final UimaContext uimaContext ) throws ResourceInitializationException {
- super.initialize( uimaContext );
- try {
- if ( fitOutputDirectoryPath != null ) {
- _prettyTextWriter.setOutputDirectory( fitOutputDirectoryPath );
- } else {
- _prettyTextWriter.setOutputDirectory( (String)uimaContext.getConfigParameterValue( PARAM_OUTPUTDIR ) );
- }
- } catch ( IllegalArgumentException | SecurityException multE ) {
- // thrown if the path specifies a File (not Dir) or by file system access methods
- throw new ResourceInitializationException( multE );
- }
+ public void writeFile( final JCas jCas,
+ final String outputDir,
+ final String documentId,
+ final String fileName ) throws IOException {
+ _prettyTextWriter.writeFile( jCas, outputDir + "/" + fileName );
}
-
/**
* {@inheritDoc}
*/
@Override
- public void process( final CAS aCAS ) throws AnalysisEngineProcessException {
- JCas jcas;
+ public void initialize( final UimaContext uimaContext ) throws ResourceInitializationException {
+ super.initialize( uimaContext );
try {
- jcas = aCAS.getJCas();
- } catch ( CASException casE ) {
- throw new AnalysisEngineProcessException( casE );
+ _prettyTextWriter.setOutputDirectory( (String)uimaContext.getConfigParameterValue( PARAM_OUTPUTDIR ) );
+ } catch ( IllegalArgumentException | SecurityException multE ) {
+ // thrown if the path specifies a File (not Dir) or by file system access methods
+ throw new ResourceInitializationException( multE );
}
- _prettyTextWriter.process( jcas );
}
- /**
- * @return This Cas Consumer as an Analysis Engine
- * @throws org.apache.uima.resource.ResourceInitializationException if anything went wrong
- */
- static public AnalysisEngineDescription createAnnotatorDescription() throws ResourceInitializationException {
- return createAnnotatorDescription( "" );
- }
- /**
- * @param outputDirectoryPath may be empty or null, in which case the current working directory is used
- * @return This Cas Consumer as an Analysis Engine
- * @throws org.apache.uima.resource.ResourceInitializationException if anything went wrong
- */
- static public AnalysisEngineDescription createAnnotatorDescription( final String outputDirectoryPath )
- throws ResourceInitializationException {
- return AnalysisEngineFactory.createEngineDescription( PrettyTextWriterFit.class,
- PARAM_OUTPUTDIR, outputDirectoryPath );
- }
+// /**
+// * {@inheritDoc}
+// */
+// @Override
+// public void process( final CAS aCAS ) throws AnalysisEngineProcessException {
+// JCas jcas;
+// try {
+// jcas = aCAS.getJCas();
+// } catch ( CASException casE ) {
+// throw new AnalysisEngineProcessException( casE );
+// }
+// _prettyTextWriter.process( jcas );
+// }
+//
+// /**
+// * @return This Cas Consumer as an Analysis Engine
+// * @throws org.apache.uima.resource.ResourceInitializationException if anything went wrong
+// */
+// static public AnalysisEngineDescription createAnnotatorDescription() throws ResourceInitializationException {
+// return createAnnotatorDescription( "" );
+// }
+//
+// /**
+// * @param outputDirectoryPath may be empty or null, in which case the current working directory is used
+// * @return This Cas Consumer as an Analysis Engine
+// * @throws org.apache.uima.resource.ResourceInitializationException if anything went wrong
+// */
+// static public AnalysisEngineDescription createAnnotatorDescription( final String outputDirectoryPath )
+// throws ResourceInitializationException {
+// return AnalysisEngineFactory.createEngineDescription( PrettyTextWriterFit.class,
+// PARAM_OUTPUTDIR, outputDirectoryPath );
+// }
}
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriterUima.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriterUima.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriterUima.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriterUima.java Wed Mar 1 00:22:32 2017
@@ -1,5 +1,6 @@
package org.apache.ctakes.core.cc.pretty.plaintext;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.log4j.Logger;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
@@ -20,6 +21,13 @@ import static org.apache.ctakes.core.con
* @see org.apache.ctakes.core.cc.pretty.plaintext.PrettyTextWriter
* @since 6/24/2015
*/
+@PipeBitInfo(
+ name = "Pretty Text Writer (UIMA)",
+ description = "Writes text files with document text and simple markups (POS, Semantic Group, CUI, Negation).",
+ role = PipeBitInfo.Role.WRITER,
+ input = PipeBitInfo.POPULATED_JCAS,
+ output = PipeBitInfo.NO_OUTPUT
+)
final public class PrettyTextWriterUima extends CasConsumer_ImplBase {
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriterFit.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriterFit.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriterFit.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriterFit.java Wed Mar 1 00:22:32 2017
@@ -1,5 +1,6 @@
package org.apache.ctakes.core.cc.property.plaintext;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -25,6 +26,13 @@ import static org.apache.ctakes.core.con
* @version %I%
* @since 10/15/2015
*/
+@PipeBitInfo(
+ name = "Property Text Writer",
+ description = "Writes text files with lists of annotations and properties (POS, Semantic Group, CUI, Negation).",
+ role = PipeBitInfo.Role.WRITER,
+ input = PipeBitInfo.POPULATED_JCAS,
+ output = PipeBitInfo.NO_OUTPUT
+)
public class PropertyTextWriterFit extends CasConsumer_ImplBase {
@ConfigurationParameter(
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriterUima.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriterUima.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriterUima.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriterUima.java Wed Mar 1 00:22:32 2017
@@ -1,5 +1,6 @@
package org.apache.ctakes.core.cc.property.plaintext;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
@@ -20,6 +21,13 @@ import static org.apache.ctakes.core.con
* @version %I%
* @since 10/15/2015
*/
+@PipeBitInfo(
+ name = "Property Text Writer (UIMA)",
+ description = "Writes text files with lists of annotations and properties (POS, Semantic Group, CUI, Negation).",
+ role = PipeBitInfo.Role.WRITER,
+ input = PipeBitInfo.POPULATED_JCAS,
+ output = PipeBitInfo.NO_OUTPUT
+)
public class PropertyTextWriterUima extends CasConsumer_ImplBase {
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FilesInDirectoryCollectionCyclicalReads.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FilesInDirectoryCollectionCyclicalReads.java?rev=1784840&r1=1784839&r2=1784840&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FilesInDirectoryCollectionCyclicalReads.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FilesInDirectoryCollectionCyclicalReads.java Wed Mar 1 00:22:32 2017
@@ -43,17 +43,23 @@ package org.apache.ctakes.core.cr;
* gather files in the directory from sub directories.
*/
-import java.io.IOException;
-
-
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.uima.cas.CAS;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
+import java.io.IOException;
+
-public class FilesInDirectoryCollectionCyclicalReads extends FilesInDirectoryCollectionReader
+@PipeBitInfo(
+ name = "Files in Dir Cycle Reader",
+ description = "Reads document texts from text files in a directory, repeating for a number of iterations.",
+ role = PipeBitInfo.Role.READER,
+ output = PipeBitInfo.NEW_JCAS
+)
+public class FilesInDirectoryCollectionCyclicalReads extends FilesInDirectoryCollectionReader
{
/**
* Name of configuration parameter that must be set to the path of