You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2022/12/16 23:22:48 UTC
[ctakes] branch master updated: Path updates in temporal Assorted minor updates Got rid of some compile warnings
This is an automated email from the ASF dual-hosted git repository.
seanfinan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ctakes.git
The following commit(s) were added to refs/heads/master by this push:
new 7f5ec0e Path updates in temporal Assorted minor updates Got rid of some compile warnings
7f5ec0e is described below
commit 7f5ec0e97d2afaec512a2298728f6cc92a253cd5
Author: Sean Finan <se...@childrens.harvard.edu>
AuthorDate: Fri Dec 16 18:09:35 2022 -0500
Path updates in temporal
Assorted minor updates
Got rid of some compile warnings
---
.../assertion/cr/GoldEntityAndAttributeReader.java | 78 ++++---
.../assertion/cr/MiPACQKnowtatorXMLReader.java | 99 ++++-----
.../ctakes/assertion/cr/NegExCorpusReader.java | 59 +++---
.../eval/AnnotationStatisticsCompact.java | 46 +++--
.../ctakes/assertion/eval/AssertionEvaluation.java | 29 +--
.../cleartk/CreateAssertionDescriptor.java | 19 +-
.../medfacts/cleartk/TrainAssertionModel.java | 15 +-
.../WindowedAssertionCleartkAnalysisEngine.java | 20 +-
.../windowed/context/AbstractWindowedContext.java | 2 +-
.../GoldEntityAndAttributeReaderPipeline.java | 106 +++++-----
...ityAndAttributeReaderPipelineForSeedCorpus.java | 89 ++++----
.../clinicalpipeline/ClinicalPipelineWithUmls.java | 8 +-
.../org/apache/ctakes/core/cr/FileTreeReader.java | 2 +-
.../coreference/ae/EventCoreferenceAnnotator.java | 3 +-
.../lookup/fast/pipeline/DictionarySubPipe.piper | 2 -
.../lookup/fast/pipeline/TsDictionarySubPipe.piper | 2 -
ctakes-distribution/src/main/bin/README | 12 ++
.../apache/ctakes/dockhand/gui/feature/Option.java | 3 +-
.../examples/pipeline/MultiThreadedPipeline.java | 13 +-
.../ctakes/examples/pipeline/BigPipeline.piper | 19 +-
.../apache/ctakes/gui/component/SmoothToolTip.java | 7 +-
.../gui/dictionary/DictionaryDownloader.java | 5 +-
.../ctakes/lvg/ae/LvgBaseTokenAnnotator.java | 225 +++++++++------------
.../preprocessor/ClinicalNotePreProcessor.java | 92 ++++-----
.../ctakes/preprocessor/DocumentMetaData.java | 40 ++--
.../ae/RelationExtractorAnnotator.java | 14 +-
.../ctakes/temporal/ae/BackwardsTimeAnnotator.java | 2 +-
...cutiveSentencesEventEventRelationAnnotator.java | 7 +-
...ecutiveSentencesEventTimeRelationAnnotator.java | 3 +-
.../ae/EventEventCRFRelationAnnotator.java | 51 ++---
.../ae/EventEventI2B2RelationAnnotator.java | 3 +-
.../EventEventRelationGoldContainerAnnotator.java | 3 +-
.../ae/EventEventRelationSeedBasedAnnotator.java | 3 +-
.../ae/EventTimeI2B2RelationAnnotator.java | 3 +-
.../temporal/ae/EventTimeRelationAnnotator.java | 5 +-
.../ae/EventTimeSelfRelationAnnotator.java | 3 +-
.../ae/TemporalRelationExtractorAnnotator.java | 15 +-
.../temporal/ae/baselines/TreeHeightBaseline.java | 3 +-
.../feature/CheckSpecialWordRelationExtractor.java | 133 ++++++++----
.../temporal/ae/feature/TimeWordTypeExtractor.java | 54 +++--
40 files changed, 703 insertions(+), 594 deletions(-)
diff --git a/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/GoldEntityAndAttributeReader.java b/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/GoldEntityAndAttributeReader.java
index c2f8eaf..aee138a 100644
--- a/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/GoldEntityAndAttributeReader.java
+++ b/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/GoldEntityAndAttributeReader.java
@@ -20,6 +20,7 @@ package org.apache.ctakes.assertion.cr;
import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.util.Mapper;
+import org.apache.ctakes.core.util.annotation.SemanticGroup;
import org.apache.ctakes.core.util.doc.DocIdUtil;
import org.apache.ctakes.typesystem.type.constants.CONST;
import org.apache.ctakes.typesystem.type.textsem.EntityMention;
@@ -33,7 +34,6 @@ import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.jdom.Document;
-import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
import java.io.File;
@@ -63,7 +63,7 @@ public class GoldEntityAndAttributeReader extends JCasAnnotator_ImplBase {
public static String inputDirectory;
// counter for assigning entity ids
public int identifiedAnnotationId;
- private boolean VERBOSE = true;
+// private final boolean VERBOSE = true;
@Override
public void initialize(UimaContext aContext) throws ResourceInitializationException {
@@ -88,13 +88,12 @@ public class GoldEntityAndAttributeReader extends JCasAnnotator_ImplBase {
Document document;
try {
document = builder.build(new File(goldFilePath));
- } catch (JDOMException e) {
+ } catch ( Exception e) {
throw new AnalysisEngineProcessException(e);
- } catch (Exception e) { // TODO this should be IOException, but the command-line maven build was breaking
- throw new AnalysisEngineProcessException(e);
- }
+ }// TODO this should be IOException, but the command-line maven build was breaking
+
- // map knowtator mention ids to entity offsets
+ // map knowtator mention ids to entity offsets
HashMap<String, ArrayList<Span>> allMentions = XMLReader.getEntityMentions(document);
// map knowtator mention ids to entity types
HashMap<String, String> entityTypes = XMLReader.getEntityTypes(document);
@@ -145,7 +144,7 @@ public class GoldEntityAndAttributeReader extends JCasAnnotator_ImplBase {
|| type==CONST.NE_TYPE_ID_DRUG
|| type==CONST.NE_TYPE_ID_FINDING
|| type==CONST.NE_TYPE_ID_PROCEDURE
- || type==CONST.NE_TYPE_ID_ANATOMICAL_SITE
+// || type==CONST.NE_TYPE_ID_ANATOMICAL_SITE
) {
eMention = new EventMention(initView, first.start, last.end);
} else {
@@ -153,7 +152,8 @@ public class GoldEntityAndAttributeReader extends JCasAnnotator_ImplBase {
}
// set easy attributes
- eMention.setTypeID(Mapper.getEntityTypeId(entityTypes.get(mentionId)));
+// eMention.setTypeID(Mapper.getEntityTypeId(entityTypes.get(mentionId)));
+ eMention.setTypeID( SemanticGroup.getGroup( entityTypes.get( mentionId ) ).getCode() );
eMention.setId(identifiedAnnotationId++);
eMention.setDiscoveryTechnique(CONST.NE_DISCOVERY_TECH_GOLD_ANNOTATION);
eMention.setConfidence(1);
@@ -163,7 +163,7 @@ public class GoldEntityAndAttributeReader extends JCasAnnotator_ImplBase {
}
List<ArgumentInfo> assocAttributes = getLeafAttributes(mentionId,
- mentionAttr,attrPtr,attrs,new ArrayList<ArgumentInfo>());
+ mentionAttr,attrPtr,attrs,new ArrayList<>());
for (ArgumentInfo a : assocAttributes) {
@@ -197,12 +197,12 @@ public class GoldEntityAndAttributeReader extends JCasAnnotator_ImplBase {
// assumes that if you're in an attribute mention, you only have one value
if (attrPtr.containsKey(attrId)) {
ArgumentInfo a = attrPtr.get(attrId);
- if ( !isRelationArgument(attrPtr.get(attrId).role) )
+ if ( notRelationArgument( attrPtr.get( attrId ).role ) )
getLeafAttributes(attrPtr.get(attrId).value, mentionAttr, attrPtr, attrs, output);
} else if (attrs.containsKey(attrId)){
ArgumentInfo a = attrs.get(attrId);
- if ( !isRelationArgument(attrs.get(attrId).role) )
+ if ( notRelationArgument( attrs.get( attrId ).role ) )
output.add(attrs.get(attrId));
}
}
@@ -222,22 +222,20 @@ public class GoldEntityAndAttributeReader extends JCasAnnotator_ImplBase {
return output;
}
- private boolean isRelationArgument(String role) {
+ private boolean notRelationArgument( String role ) {
if (normalizeRoleName(role).equals("Related_to")) {
- return true;
- } else if (normalizeRoleName(role).equals("Argument")) {
- return true;
+ return false;
}
- return false;
+ return !normalizeRoleName( role ).equals( "Argument" );
}
private void checkForAttrValue(IdentifiedAnnotation eMention, String role,
String value) {
if (role.contains("_normalization")) {
if (role.startsWith("conditional")) {
- eMention.setConditional(Boolean.valueOf(value));
+ eMention.setConditional(Boolean.parseBoolean( value ) );
} else if (role.startsWith("generic")) {
- eMention.setGeneric(Boolean.valueOf(value));
+ eMention.setGeneric(Boolean.parseBoolean( value ) );
} else if (role.startsWith("negation_indicator")) {
// assumes that the string from Knowtator is exactly "negation_present"
if (value.equals("negation_present")) {
@@ -255,8 +253,8 @@ public class GoldEntityAndAttributeReader extends JCasAnnotator_ImplBase {
} else {
eMention.setUncertainty(CONST.NE_UNCERTAINTY_ABSENT);
}
- } else if (role.startsWith("generic")) {
- eMention.setGeneric(Boolean.valueOf(value));
+// } else if (role.startsWith("generic")) {
+// eMention.setGeneric(Boolean.valueOf(value));
}
}
}
@@ -266,7 +264,7 @@ public class GoldEntityAndAttributeReader extends JCasAnnotator_ImplBase {
private HashMap<String, ArrayList<Span>> filterToNamedEntitiesOnly(
HashMap<String, ArrayList<Span>> entityMentions,
HashMap<String, String> entityTypes) {
- HashMap<String, ArrayList<Span>> newEntityMentions = new HashMap<String, ArrayList<Span>>();
+ HashMap<String, ArrayList<Span>> newEntityMentions = new HashMap<>();
for (Entry<String, String> etype : entityTypes.entrySet()) {
if (etype.getValue().equals("Anatomical_site")
@@ -287,24 +285,24 @@ public class GoldEntityAndAttributeReader extends JCasAnnotator_ImplBase {
// Takes the Knowtator schema value and filters out things that are not NE.
// In principle can have a parallel "filterToAttributesOnly"
- private boolean filterToNamedEntitiesOnly(
- HashMap<String, ArrayList<Span>> entityMentions,
- String typeKey, String typeValue) {
-
- if (typeValue.toLowerCase().equals("Anatomical_site")
- || typeValue.toLowerCase().equals("Disease_Disorder")
- || typeValue.toLowerCase().equals("Lab")
- || typeValue.toLowerCase().equals("Medications")
- || typeValue.toLowerCase().equals("Procedure")
- || typeValue.toLowerCase().equals("Sign_symptom")
- ) {
- if (entityMentions.containsKey(typeKey)) {
- return true;
- }
- }
-
- return false;
- }
+// private boolean filterToNamedEntitiesOnly(
+// HashMap<String, ArrayList<Span>> entityMentions,
+// String typeKey, String typeValue) {
+// // Note: Nothing toLowerCase() will ever match another string with UpperCase Characters!
+// if (typeValue.toLowerCase().equals("Anatomical_site")
+// || typeValue.toLowerCase().equals("Disease_Disorder")
+// || typeValue.toLowerCase().equals("Lab")
+// || typeValue.toLowerCase().equals("Medications")
+// || typeValue.toLowerCase().equals("Procedure")
+// || typeValue.toLowerCase().equals("Sign_symptom")
+// ) {
+// if (entityMentions.containsKey(typeKey)) {
+// return true;
+// }
+// }
+//
+// return false;
+// }
/**
* Convert Argument_CU and Related_to_CU to Argument and Related_to.
diff --git a/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/MiPACQKnowtatorXMLReader.java b/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/MiPACQKnowtatorXMLReader.java
index 681b01d..2841018 100644
--- a/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/MiPACQKnowtatorXMLReader.java
+++ b/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/MiPACQKnowtatorXMLReader.java
@@ -22,16 +22,12 @@ import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
+import java.util.*;
import org.apache.ctakes.assertion.util.AssertionConst;
-import org.apache.ctakes.core.cc.XmiWriterCasConsumerCtakes;
+import org.apache.ctakes.core.cc.FileTreeXmiWriter;
+import org.apache.ctakes.core.config.ConfigParameterConstants;
+import org.apache.ctakes.core.cr.FileTreeReader;
import org.apache.ctakes.core.knowtator.KnowtatorAnnotation;
import org.apache.ctakes.core.knowtator.KnowtatorXMLParser;
import org.apache.ctakes.core.pipeline.PipeBitInfo;
@@ -113,10 +109,6 @@ import org.apache.uima.jcas.cas.TOP;
import org.apache.uima.jcas.tcas.Annotation;
import org.jdom2.JDOMException;
-import com.google.common.base.Charsets;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
-import com.google.common.io.Files;
/**
* assumes knowtator xml files are in "exported-xml" subdirectory w/ train/dev/test subsubdirs
@@ -148,7 +140,8 @@ public class MiPACQKnowtatorXMLReader extends JCasAnnotator_ImplBase {
private static final Map<String, String> SUBJECT_KNOWTATOR_TO_UIMA_MAP;
static {
- SUBJECT_KNOWTATOR_TO_UIMA_MAP = Maps.newHashMap();
+// SUBJECT_KNOWTATOR_TO_UIMA_MAP = Maps.newHashMap();
+ SUBJECT_KNOWTATOR_TO_UIMA_MAP = new HashMap<>();
SUBJECT_KNOWTATOR_TO_UIMA_MAP.put("C0030705", CONST.ATTR_SUBJECT_PATIENT);
SUBJECT_KNOWTATOR_TO_UIMA_MAP.put("patient", CONST.ATTR_SUBJECT_PATIENT);
SUBJECT_KNOWTATOR_TO_UIMA_MAP.put("family_member", CONST.ATTR_SUBJECT_FAMILY_MEMBER);
@@ -224,8 +217,7 @@ public class MiPACQKnowtatorXMLReader extends JCasAnnotator_ImplBase {
// }
// URI relUri = new URI("../../exported-xml/"+lastDir+"/"+file); // relative to text directory
// URI newUri = this.getTextURI(jCas).resolve(relUri);
- URI newUri = new URI(newPath);
- return newUri;
+ return new URI( newPath);
} catch (URISyntaxException e) {
throw new AnalysisEngineProcessException(e);
}
@@ -249,31 +241,30 @@ public class MiPACQKnowtatorXMLReader extends JCasAnnotator_ImplBase {
private static List<String> getDiseaseDisorderKnowtatorClasses() {
- return Arrays.asList(new String [] {"Disorders"});
+ return Collections.singletonList( "Disorders" );
}
private static List<String> getSignSymptomKnowtatorClasses() {
- return Arrays.asList(new String [] {"Sign_Symptom", "Finding"});
+ return Arrays.asList( "Sign_Symptom", "Finding" );
}
private static List<String> getProcedureKnowtatorClasses() {
- return Arrays.asList(new String [] {
- "Diagnostic_procedure",
- "Laboratory_procedure",
- "Procedures",
- "Therapeutic_or_preventive_procedure",
- "Intervention",
- "Health_care_activity",
- "Research_activity"});
+ return Arrays.asList( "Diagnostic_procedure",
+ "Laboratory_procedure",
+ "Procedures",
+ "Therapeutic_or_preventive_procedure",
+ "Intervention",
+ "Health_care_activity",
+ "Research_activity" );
}
private static List<String> getMedicationKnowtatorClasses() {
- return Arrays.asList(new String [] {"Chemicals_and_drugs", "Pharmacologic_substance"});
+ return Arrays.asList( "Chemicals_and_drugs", "Pharmacologic_substance" );
}
private static List<String> getAnatomyKnowtatorClasses() {
- return Arrays.asList(new String [] {"Anatomy"});
+ return Collections.singletonList( "Anatomy" );
}
@Override
@@ -301,7 +292,8 @@ public class MiPACQKnowtatorXMLReader extends JCasAnnotator_ImplBase {
}
- Set<String> nonAnnotationTypes = Sets.newHashSet(); // those expected not to have spans
+// Set<String> nonAnnotationTypes = Sets.newHashSet(); // those expected not to have spans
+ Set<String> nonAnnotationTypes = new HashSet<>(); // those expected not to have spans
// create a CAS object for each annotation
Map<String, TOP> idAnnotationMap = new HashMap<>();
@@ -676,14 +668,14 @@ public class MiPACQKnowtatorXMLReader extends JCasAnnotator_ImplBase {
} else if ("conditional_class".equals(annotation.type)) {
Boolean value = booleanSlots.remove("conditional_normalization");
ConditionalModifier modifier = new ConditionalModifier(jCas, coveringSpan.begin, coveringSpan.end);
- modifier.setConditional(value == null ? false : value);
+ modifier.setConditional( value != null && value );
modifier.addToIndexes();
idAnnotationMap.put(annotation.id, modifier);
} else if ("generic_class".equals(annotation.type)) {
Boolean value = booleanSlots.remove("generic_normalization");
GenericModifier modifier = new GenericModifier(jCas, coveringSpan.begin, coveringSpan.end);
- modifier.setGeneric(value == null ? false : value);
+ modifier.setGeneric( value != null && value );
modifier.addToIndexes();
idAnnotationMap.put(annotation.id, modifier);
@@ -1158,7 +1150,7 @@ public class MiPACQKnowtatorXMLReader extends JCasAnnotator_ImplBase {
Boolean negation = booleanSlots.remove("negation");
mention.setPolarity(negation == null
? CONST.NE_POLARITY_NEGATION_ABSENT
- : negation == true ? CONST.NE_POLARITY_NEGATION_PRESENT : CONST.NE_POLARITY_NEGATION_ABSENT);
+ : negation ? CONST.NE_POLARITY_NEGATION_PRESENT : CONST.NE_POLARITY_NEGATION_ABSENT );
// add features for conditional, generic, etc.
KnowtatorAnnotation conditional = annotationSlots.remove("conditional_CU");
@@ -1185,15 +1177,19 @@ public class MiPACQKnowtatorXMLReader extends JCasAnnotator_ImplBase {
// convert status as necessary
String status = stringSlots.remove("Status");
if (status != null) {
- if ("HistoryOf".equals(status)) {
- mention.setHistoryOf(CONST.NE_HISTORY_OF_PRESENT);
- } else if ("FamilyHistoryOf".equals(status)) {
- mention.setHistoryOf(CONST.NE_HISTORY_OF_PRESENT);
- mention.setSubject(CONST.ATTR_SUBJECT_FAMILY_MEMBER);
- } else if ("Possible".equals(status)) {
- mention.setUncertainty(CONST.NE_CERTAINTY_NEGATED);
- } else {
- throw new UnsupportedOperationException("Unknown status: " + status);
+ switch ( status ) {
+ case "HistoryOf":
+ mention.setHistoryOf( CONST.NE_HISTORY_OF_PRESENT );
+ break;
+ case "FamilyHistoryOf":
+ mention.setHistoryOf( CONST.NE_HISTORY_OF_PRESENT );
+ mention.setSubject( CONST.ATTR_SUBJECT_FAMILY_MEMBER );
+ break;
+ case "Possible":
+ mention.setUncertainty( CONST.NE_CERTAINTY_NEGATED );
+ break;
+ default:
+ throw new UnsupportedOperationException( "Unknown status: " + status );
}
}
@@ -1284,9 +1280,11 @@ public class MiPACQKnowtatorXMLReader extends JCasAnnotator_ImplBase {
private static class DelayedRelationFeature extends DelayedFeature {
- private Class<? extends BinaryTextRelation> relationClass;
- private Annotation arg1, arg2;
- private Class<? extends Annotation> arg1Class, arg2Class;
+ private final Class<? extends BinaryTextRelation> relationClass;
+ private final Annotation arg1;
+ private final Annotation arg2;
+ private final Class<? extends Annotation> arg1Class;
+ private final Class<? extends Annotation> arg2Class;
public DelayedRelationFeature(
Annotation annotation,
@@ -1403,22 +1401,27 @@ public class MiPACQKnowtatorXMLReader extends JCasAnnotator_ImplBase {
AnalysisEngine mipacqReader = AnalysisEngineFactory.createEngine(MiPACQKnowtatorXMLReader.class);
AnalysisEngine xWriter = AnalysisEngineFactory.createEngine(
- XmiWriterCasConsumerCtakes.class,
- XmiWriterCasConsumerCtakes.PARAM_OUTPUTDIR,
+// XmiWriterCasConsumerCtakes.class,
+// XmiWriterCasConsumerCtakes.PARAM_OUTPUTDIR,
+ FileTreeXmiWriter.class,
+ ConfigParameterConstants.PARAM_OUTPUTDIR,
"/usr/data/MiPACQ/cTAKES-xmi/");
int n = dirs.length;
LOGGER.info("Processing " + n + " directories of knowtator xml files.");
+ final FileTreeReader reader = new FileTreeReader();
for (String knowtatorTextDirectoryPath : dirs) {
//File knowtatorXmlDirectory = new File(knowtatorTextDirectoryPath, "exported-xml");
File knowtatorTextSourceDirectory = new File(knowtatorTextDirectoryPath, "text");
File [] knowtatorTextSourceFiles = knowtatorTextSourceDirectory.listFiles();
- int i = knowtatorTextSourceFiles.length;
+ assert knowtatorTextSourceFiles != null;
+ int i = knowtatorTextSourceFiles.length;
LOGGER.info("Processing " + i + " knowtator text source files for this directory.");
for (File textFile : knowtatorTextSourceFiles) {
JCas jCas = mipacqReader.newJCas();
- jCas.setDocumentText(Files.toString(textFile, Charsets.US_ASCII));
- DocumentID documentID = new DocumentID(jCas);
+// jCas.setDocumentText(Files.toString(textFile, Charsets.US_ASCII));
+ jCas.setDocumentText( reader.readFile( textFile ) );
+ DocumentID documentID = new DocumentID(jCas);
documentID.setDocumentID(textFile.toURI().toString());
documentID.addToIndexes();
mipacqReader.process(jCas);
diff --git a/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/NegExCorpusReader.java b/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/NegExCorpusReader.java
index 58dcb43..99cdaba 100644
--- a/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/NegExCorpusReader.java
+++ b/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/NegExCorpusReader.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -20,21 +20,17 @@ package org.apache.ctakes.assertion.cr;
import org.apache.ctakes.assertion.medfacts.cleartk.AssertionComponents;
import org.apache.ctakes.assertion.util.AssertionConst;
-import org.apache.ctakes.core.cc.XmiWriterCasConsumerCtakes;
+import org.apache.ctakes.core.cc.FileTreeXmiWriter;
+import org.apache.ctakes.core.config.ConfigParameterConstants;
import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.ctakes.core.pipeline.PipelineBuilder;
import org.apache.ctakes.typesystem.type.structured.DocumentID;
import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.log4j.Logger;
-import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionException;
-import org.apache.uima.collection.CollectionReaderDescription;
import org.apache.uima.collection.CollectionReader_ImplBase;
-import org.apache.uima.fit.factory.AggregateBuilder;
-import org.apache.uima.fit.factory.AnalysisEngineFactory;
-import org.apache.uima.fit.factory.CollectionReaderFactory;
-import org.apache.uima.fit.pipeline.SimplePipeline;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.metadata.TypeSystemDescription;
import org.apache.uima.util.Progress;
@@ -63,7 +59,7 @@ public class NegExCorpusReader extends CollectionReader_ImplBase {
private boolean skipReadingValuesJustReadText = false;
- private static TypeSystemDescription typeSystemDescription = AssertionComponents.CTAKES_CTS_TYPE_SYSTEM_DESCRIPTION; // TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath();//.createTypeSystemDescription();
+ private static final TypeSystemDescription typeSystemDescription = AssertionComponents.CTAKES_CTS_TYPE_SYSTEM_DESCRIPTION; // TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath();//.createTypeSystemDescription();
public NegExCorpusReader() {
@@ -89,7 +85,7 @@ public class NegExCorpusReader extends CollectionReader_ImplBase {
if (n==0) LOGGER.error(n + " lines found in " + filename);
LOGGER.info("Processing " + n + " lines from the negex file, treating each line as a document.");
- list = new ArrayList<NegExAnnotation>();
+ list = new ArrayList<>();
for (String data : lines) {
LOGGER.info("Processing line '" + data + "'.");
try {
@@ -131,30 +127,35 @@ public class NegExCorpusReader extends CollectionReader_ImplBase {
//CollectionReader negexReader = new NegExCorpusReader(false);
//List<NegExAnnotation> list = readAndParseAllLines(filename);
- CollectionReaderDescription collectionReader = CollectionReaderFactory.createReaderDescription(
- NegExCorpusReader.class,
- typeSystemDescription
- );
-
- //TypeSystemDescription typeSystemDescription = AssertionComponents.TYPE_SYSTEM_DESCRIPTION; // TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath();//.createTypeSystemDescription();
- AnalysisEngineDescription xWriter = AnalysisEngineFactory.createEngineDescription(
- XmiWriterCasConsumerCtakes.class,
- XmiWriterCasConsumerCtakes.PARAM_OUTPUTDIR,
- AssertionConst.NEGEX_CORPUS_PREPROCESSED
- );
-
- AggregateBuilder aggregate = new AggregateBuilder();
- aggregate.add(xWriter);
-
- SimplePipeline.runPipeline(collectionReader, aggregate.createAggregateDescription());
-
-
+// CollectionReaderDescription collectionReader = CollectionReaderFactory.createReaderDescription(
+// NegExCorpusReader.class,
+// typeSystemDescription
+// );
+//
+// //TypeSystemDescription typeSystemDescription = AssertionComponents.TYPE_SYSTEM_DESCRIPTION; // TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath();//.createTypeSystemDescription();
+// AnalysisEngineDescription xWriter = AnalysisEngineFactory.createEngineDescription(
+// XmiWriterCasConsumerCtakes.class,
+// XmiWriterCasConsumerCtakes.PARAM_OUTPUTDIR,
+// AssertionConst.NEGEX_CORPUS_PREPROCESSED
+// );
+//
+// AggregateBuilder aggregate = new AggregateBuilder();
+// aggregate.add(xWriter);
+//
+// SimplePipeline.runPipeline(collectionReader, aggregate.createAggregateDescription());
+
+ // This is much simpler than the dozen or so lines above.
+ new PipelineBuilder().reader( NegExCorpusReader.class )
+ .set( ConfigParameterConstants.PARAM_OUTPUTDIR,
+ AssertionConst.NEGEX_CORPUS_PREPROCESSED )
+ .add( FileTreeXmiWriter.class )
+ .run();
}
private static String[] readNonWhiteSpaceLines(String filename) {
- List<String> lines = new ArrayList<String>();
+ List<String> lines = new ArrayList<>();
BufferedReader br = null;
try {
br = new BufferedReader(new FileReader(filename));
diff --git a/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AnnotationStatisticsCompact.java b/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AnnotationStatisticsCompact.java
index 2a713c0..56d9f41 100644
--- a/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AnnotationStatisticsCompact.java
+++ b/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AnnotationStatisticsCompact.java
@@ -54,13 +54,13 @@ public class AnnotationStatisticsCompact<OUTCOME_TYPE extends Comparable<? super
private static final long serialVersionUID = 1L;
- private Multiset<OUTCOME_TYPE> referenceOutcomes;
+ private final Multiset<OUTCOME_TYPE> referenceOutcomes;
- private Multiset<OUTCOME_TYPE> predictedOutcomes;
+ private final Multiset<OUTCOME_TYPE> predictedOutcomes;
- private Multiset<OUTCOME_TYPE> correctOutcomes;
+ private final Multiset<OUTCOME_TYPE> correctOutcomes;
- private ConfusionMatrix<OUTCOME_TYPE> confusionMatrix;
+ private final ConfusionMatrix<OUTCOME_TYPE> confusionMatrix;
/**
* Creates a {@link Function} that converts an {@link Annotation} into a hashable representation
@@ -117,8 +117,7 @@ public class AnnotationStatisticsCompact<OUTCOME_TYPE extends Comparable<? super
* Add all statistics together.
*
* This is often useful for combining individual fold statistics that result from methods like
- * {@link Evaluation_ImplBase#crossValidation(List, int)}.
- *
+ *
* @param statistics
* The sequence of statistics that should be combined.
* @return The combination of all the individual statistics.
@@ -160,8 +159,8 @@ public class AnnotationStatisticsCompact<OUTCOME_TYPE extends Comparable<? super
this.add(
referenceAnnotations,
predictedAnnotations,
- AnnotationStatisticsCompact.<ANNOTATION_TYPE> annotationToSpan(),
- AnnotationStatisticsCompact.<ANNOTATION_TYPE, OUTCOME_TYPE> annotationToNull());
+ AnnotationStatisticsCompact.annotationToSpan(),
+ AnnotationStatisticsCompact.annotationToNull());
}
/**
@@ -208,8 +207,7 @@ public class AnnotationStatisticsCompact<OUTCOME_TYPE extends Comparable<? super
this.predictedOutcomes.addAll(predictedSpanOutcomes.values());
// determine the outcomes that were correct
- Set<SPAN_TYPE> intersection = new HashSet<>();
- intersection.addAll(referenceSpanOutcomes.keySet());
+ Set<SPAN_TYPE> intersection = new HashSet<>( referenceSpanOutcomes.keySet() );
intersection.retainAll(predictedSpanOutcomes.keySet());
for (SPAN_TYPE span : intersection) {
OUTCOME_TYPE goldOutcome = referenceSpanOutcomes.get(span);
@@ -267,7 +265,8 @@ public class AnnotationStatisticsCompact<OUTCOME_TYPE extends Comparable<? super
return this.referenceOutcomes.count(outcome);
}
- public int countFalseNegatives(OUTCOME_TYPE... positiveOutcomes) {
+ @SafeVarargs
+ public final int countFalseNegatives( OUTCOME_TYPE... positiveOutcomes ) {
int numReferenceOutcomes = this.countReferenceOutcomes();
int numPredictedOutcomes = this.countPredictedOutcomes();
if (numReferenceOutcomes != numPredictedOutcomes) {
@@ -275,8 +274,8 @@ public class AnnotationStatisticsCompact<OUTCOME_TYPE extends Comparable<? super
String.format(
"Expected number equal number of references outcomes and predicted outcomes. Had reference outcomes=%d, predicted outcomes=%d",
numReferenceOutcomes,
- numPredictedOutcomes,
- this.countPredictedOutcomes()));
+ numPredictedOutcomes ) );
+// this.countPredictedOutcomes()));
}
int totalFalseNegatives = 0;
for (OUTCOME_TYPE positiveOutcome : positiveOutcomes) {
@@ -286,7 +285,8 @@ public class AnnotationStatisticsCompact<OUTCOME_TYPE extends Comparable<? super
return totalFalseNegatives;
}
- public int countFalsePositives(OUTCOME_TYPE... positiveOutcomes) {
+ @SafeVarargs
+ public final int countFalsePositives( OUTCOME_TYPE... positiveOutcomes ) {
int numReferenceOutcomes = this.countReferenceOutcomes();
int numPredictedOutcomes = this.countPredictedOutcomes();
if (numReferenceOutcomes != numPredictedOutcomes) {
@@ -294,8 +294,8 @@ public class AnnotationStatisticsCompact<OUTCOME_TYPE extends Comparable<? super
String.format(
"Expected number equal number of references outcomes and predicted outcomes. Had reference outcomes=%d, predicted outcomes=%d",
numReferenceOutcomes,
- numPredictedOutcomes,
- this.countPredictedOutcomes()));
+ numPredictedOutcomes ) );
+// this.countPredictedOutcomes()));
}
int totalFalsePositives = 0;
for (OUTCOME_TYPE positiveOutcome : positiveOutcomes) {
@@ -306,7 +306,8 @@ public class AnnotationStatisticsCompact<OUTCOME_TYPE extends Comparable<? super
return totalFalsePositives;
}
- public int countTrueNegatives(OUTCOME_TYPE... positiveOutcomes) {
+ @SafeVarargs
+ public final int countTrueNegatives( OUTCOME_TYPE... positiveOutcomes ) {
int numReferenceOutcomes = this.countReferenceOutcomes();
int numPredictedOutcomes = this.countPredictedOutcomes();
if (numReferenceOutcomes != numPredictedOutcomes) {
@@ -314,8 +315,8 @@ public class AnnotationStatisticsCompact<OUTCOME_TYPE extends Comparable<? super
String.format(
"Expected number equal number of references outcomes and predicted outcomes. Had reference outcomes=%d, predicted outcomes=%d",
numReferenceOutcomes,
- numPredictedOutcomes,
- this.countPredictedOutcomes()));
+ numPredictedOutcomes ) );
+// this.countPredictedOutcomes()));
}
int totalTrueNegatives = this.countCorrectOutcomes();
@@ -327,7 +328,8 @@ public class AnnotationStatisticsCompact<OUTCOME_TYPE extends Comparable<? super
}
- public int countTruePositives(OUTCOME_TYPE... positiveOutcomes) {
+ @SafeVarargs
+ public final int countTruePositives( OUTCOME_TYPE... positiveOutcomes ) {
int numReferenceOutcomes = this.countReferenceOutcomes();
int numPredictedOutcomes = this.countPredictedOutcomes();
if (numReferenceOutcomes != numPredictedOutcomes) {
@@ -335,8 +337,8 @@ public class AnnotationStatisticsCompact<OUTCOME_TYPE extends Comparable<? super
String.format(
"Expected number equal number of references outcomes and predicted outcomes. Had reference outcomes=%d, predicted outcomes=%d",
numReferenceOutcomes,
- numPredictedOutcomes,
- this.countPredictedOutcomes()));
+ numPredictedOutcomes ) );
+// this.countPredictedOutcomes()));
}
int totalTruePositives = 0;
diff --git a/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java b/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
index d8bb6f7..c1a6099 100644
--- a/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
+++ b/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
@@ -28,6 +28,7 @@ import org.apache.ctakes.assertion.medfacts.cleartk.*;
import org.apache.ctakes.assertion.medfacts.cleartk.AssertionCleartkAnalysisEngine.FEATURE_CONFIG;
import org.apache.ctakes.assertion.pipelines.GoldEntityAndAttributeReaderPipelineForSeedCorpus;
import org.apache.ctakes.core.ae.DocumentIdPrinterAnalysisEngine;
+import org.apache.ctakes.core.cc.FileTreeXmiWriter;
import org.apache.ctakes.core.util.doc.DocIdUtil;
import org.apache.ctakes.typesystem.type.constants.CONST;
import org.apache.ctakes.typesystem.type.syntax.*;
@@ -82,7 +83,7 @@ import java.util.*;
public class AssertionEvaluation extends Evaluation_ImplBase<File, Map<String, AnnotationStatisticsCompact<String>>> {
-private static Logger logger = Logger.getLogger(AssertionEvaluation.class);
+private static final Logger logger = Logger.getLogger( AssertionEvaluation.class );
private static final String YTEX_NEGATION_DESCRIPTOR = "ytex.uima.NegexAnnotator";
@@ -528,7 +529,7 @@ public static void printScore(Map<String, AnnotationStatisticsCompact<String>> m
}
}
- private String[] trainingArguments;
+ private final String[] trainingArguments;
public AssertionEvaluation(
File modelDirectory,
@@ -607,7 +608,7 @@ public static void printScore(Map<String, AnnotationStatisticsCompact<String>> m
}
// Add each assertion Analysis Engine to the pipeline!
- builder.add(AnalysisEngineFactory.createEngineDescription(AlternateCuePhraseAnnotator.class, new Object[]{}));
+ builder.add(AnalysisEngineFactory.createEngineDescription(AlternateCuePhraseAnnotator.class ) );
if (!options.ignorePolarity)
{
@@ -889,14 +890,16 @@ public static void printScore(Map<String, AnnotationStatisticsCompact<String>> m
if (evaluationOutputDirectory != null){
String sourceFileName = DocIdUtil.getDocumentID( jCas );
- CasIOUtil.writeXmi(jCas, new File(evaluationOutputDirectory, sourceFileName + ".xmi"));
+// CasIOUtil.writeXmi(jCas, new File(evaluationOutputDirectory, sourceFileName + ".xmi"));
+ new FileTreeXmiWriter().writeFile( jCas, evaluationOutputDirectory.getAbsolutePath(),
+ sourceFileName, sourceFileName );
}
if (!options.ignorePolarity)
{
polarityStats.add(goldEntitiesAndEvents, systemEntitiesAndEvents,
- AnnotationStatisticsCompact.<IdentifiedAnnotation>annotationToSpan(),
- AnnotationStatisticsCompact.<IdentifiedAnnotation>annotationToFeatureValue("polarity"));
+ AnnotationStatisticsCompact.annotationToSpan(),
+ AnnotationStatisticsCompact.annotationToFeatureValue( "polarity" ) );
if(options.printErrors){
printErrors(jCas, goldEntitiesAndEvents, systemEntitiesAndEvents, "polarity", CONST.NE_POLARITY_NEGATION_PRESENT, Integer.class);
}
@@ -938,8 +941,8 @@ public static void printScore(Map<String, AnnotationStatisticsCompact<String>> m
if (!options.ignoreGeneric)
{
genericStats.add(goldEntitiesAndEvents, systemEntitiesAndEvents,
- AnnotationStatisticsCompact.<IdentifiedAnnotation>annotationToSpan(),
- AnnotationStatisticsCompact.<IdentifiedAnnotation>annotationToFeatureValue("generic"));
+ AnnotationStatisticsCompact.annotationToSpan(),
+ AnnotationStatisticsCompact.annotationToFeatureValue( "generic" ) );
if(options.printErrors){
printErrors(jCas, goldEntitiesAndEvents, systemEntitiesAndEvents, "generic", CONST.NE_GENERIC_TRUE, Boolean.class);
}
@@ -949,8 +952,8 @@ public static void printScore(Map<String, AnnotationStatisticsCompact<String>> m
if (!options.ignoreHistory)
{
historyStats.add(goldEntitiesAndEvents, systemEntitiesAndEvents,
- AnnotationStatisticsCompact.<IdentifiedAnnotation>annotationToSpan(),
- AnnotationStatisticsCompact.<IdentifiedAnnotation>annotationToFeatureValue("historyOf"));
+ AnnotationStatisticsCompact.annotationToSpan(),
+ AnnotationStatisticsCompact.annotationToFeatureValue("historyOf"));
if(options.printErrors){
printErrors(jCas, goldEntitiesAndEvents, systemEntitiesAndEvents, "historyOf", CONST.NE_HISTORY_OF_PRESENT, Integer.class);
}
@@ -1110,7 +1113,7 @@ private static void printErrors(JCas jCas,
private static void printInstances(JCas jCas,
Collection<IdentifiedAnnotation> goldEntitiesAndEvents,
Collection<IdentifiedAnnotation> systemEntitiesAndEvents, String classifierType,
- Object trueCategory, Class<? extends Object> categoryClass,
+ Object trueCategory, Class<?> categoryClass,
File outputfile)
throws ResourceProcessException, IOException {
@@ -1204,8 +1207,8 @@ private static void printInstances(JCas jCas,
}
}
}
- private static Object getFeatureValue(Feature feature,
- Class<? extends Object> class1, Annotation annotation) throws ResourceProcessException {
+ private static Object getFeatureValue( Feature feature,
+ Class<?> class1, Annotation annotation ) throws ResourceProcessException {
if(class1 == Integer.class){
return annotation.getIntValue(feature);
}else if(class1 == String.class){
diff --git a/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/CreateAssertionDescriptor.java b/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/CreateAssertionDescriptor.java
index f79ba9e..fbcb371 100644
--- a/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/CreateAssertionDescriptor.java
+++ b/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/CreateAssertionDescriptor.java
@@ -27,7 +27,8 @@ import org.apache.ctakes.assertion.eval.AssertionEvaluation;
import org.apache.ctakes.assertion.eval.AssertionEvaluation.ReferenceAnnotationsSystemAssertionClearer;
import org.apache.ctakes.assertion.eval.AssertionEvaluation.ReferenceIdentifiedAnnotationsSystemToGoldCopier;
import org.apache.ctakes.assertion.eval.AssertionEvaluation.ReferenceSupportingAnnotationsSystemToGoldCopier;
-import org.apache.ctakes.core.cc.XmiWriterCasConsumerCtakes;
+import org.apache.ctakes.core.cc.FileTreeXmiWriter;
+import org.apache.ctakes.core.config.ConfigParameterConstants;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.fit.factory.AggregateBuilder;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
@@ -43,10 +44,10 @@ public class CreateAssertionDescriptor
// public static final Class<? extends DataWriterFactory<String>> dataWriterFactoryClass = MaxentStringOutcomeDataWriter.class;
/**
- * @param args
- * @throws URISyntaxException
- * @throws FileNotFoundException
- * @throws ResourceInitializationException
+ * @param args -
+ * @throws URISyntaxException -
+ * @throws FileNotFoundException -
+ * @throws ResourceInitializationException -
*/
public static void main(String[] args) throws Exception
{
@@ -250,9 +251,11 @@ public class CreateAssertionDescriptor
AnalysisEngineDescription xwriter =
AnalysisEngineFactory.createEngineDescription(
- XmiWriterCasConsumerCtakes.class,
- AssertionComponents.CTAKES_CTS_TYPE_SYSTEM_DESCRIPTION,
- XmiWriterCasConsumerCtakes.PARAM_OUTPUTDIR,
+// XmiWriterCasConsumerCtakes.class,
+// AssertionComponents.CTAKES_CTS_TYPE_SYSTEM_DESCRIPTION,
+// XmiWriterCasConsumerCtakes.PARAM_OUTPUTDIR,
+ FileTreeXmiWriter.class,
+ ConfigParameterConstants.PARAM_OUTPUTDIR,
testOutputDirectory);
builder.add(xwriter);
////
diff --git a/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/TrainAssertionModel.java b/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/TrainAssertionModel.java
index 99bddb4..7367fe8 100644
--- a/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/TrainAssertionModel.java
+++ b/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/TrainAssertionModel.java
@@ -31,7 +31,9 @@ import org.apache.commons.cli.ParseException;
import org.apache.ctakes.assertion.eval.AssertionEvaluation;
import org.apache.ctakes.assertion.eval.AssertionEvaluation.ReferenceAnnotationsSystemAssertionClearer;
import org.apache.ctakes.assertion.eval.AssertionEvaluation.ReferenceIdentifiedAnnotationsSystemToGoldCopier;
+import org.apache.ctakes.core.cc.FileTreeXmiWriter;
import org.apache.ctakes.core.cc.XmiWriterCasConsumerCtakes;
+import org.apache.ctakes.core.config.ConfigParameterConstants;
import org.apache.log4j.Logger;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.collection.CollectionReader;
@@ -41,15 +43,11 @@ import org.apache.uima.fit.factory.CollectionReaderFactory;
import org.apache.uima.fit.factory.ConfigurationParameterFactory;
import org.apache.uima.fit.pipeline.SimplePipeline;
import org.apache.uima.fit.testing.util.HideOutput;
-import org.cleartk.ml.CleartkAnnotator;
-import org.cleartk.ml.DataWriterFactory;
import org.cleartk.ml.jar.DefaultDataWriterFactory;
import org.cleartk.ml.jar.DirectoryDataWriterFactory;
import org.cleartk.ml.jar.GenericJarClassifierFactory;
import org.cleartk.ml.opennlp.maxent.MaxentStringOutcomeDataWriter;
import org.cleartk.util.cr.XReader;
-//import org.junit.Test;
-//import edu.mayo.bmi.uima.core.type.textsem.EntityMention;
public class TrainAssertionModel {
@@ -239,7 +237,6 @@ public class TrainAssertionModel {
} catch (Exception e)
{
logger.error("Some exception happened while training or decoding...", e);
- return;
}
}
@@ -371,9 +368,11 @@ public class TrainAssertionModel {
//taggerDescription,
decodingAggregateDescription,
AnalysisEngineFactory.createEngineDescription(
- XmiWriterCasConsumerCtakes.class,
- AssertionComponents.CTAKES_CTS_TYPE_SYSTEM_DESCRIPTION,
- XmiWriterCasConsumerCtakes.PARAM_OUTPUTDIR,
+// XmiWriterCasConsumerCtakes.class,
+ FileTreeXmiWriter.class,
+// AssertionComponents.CTAKES_CTS_TYPE_SYSTEM_DESCRIPTION,
+// XmiWriterCasConsumerCtakes.PARAM_OUTPUTDIR,
+ ConfigParameterConstants.PARAM_OUTPUTDIR,
decodingOutputDirectory));
logger.info("finished decoding.");
diff --git a/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/WindowedAssertionCleartkAnalysisEngine.java b/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/WindowedAssertionCleartkAnalysisEngine.java
index 70fb418..ef1ca8c 100644
--- a/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/WindowedAssertionCleartkAnalysisEngine.java
+++ b/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/WindowedAssertionCleartkAnalysisEngine.java
@@ -194,8 +194,9 @@ public abstract class WindowedAssertionCleartkAnalysisEngine extends
String domainId = normalizeToDomain( dir );
File dataDir = new File( dir );
- if ( dataDir.listFiles() != null ) {
- for ( File f : dataDir.listFiles() ) {
+ final File[] dataFiles = dataDir.listFiles();
+ if ( dataFiles != null ) {
+ for ( File f : dataFiles ) {
fileToDomain.put( FilenameUtils.removeExtension( f.getName() ), domainId );
}
}
@@ -322,14 +323,17 @@ public abstract class WindowedAssertionCleartkAnalysisEngine extends
= new TreeMap<>( Comparator.comparingInt( Sentence::getBegin ) );
sentenceTreeMap.putAll( sentenceAnnotationMap );
// History needs full list of sentences
+ final List<Sentence> sentenceList = new ArrayList<>(sentenceTreeMap.keySet() );
for ( FeatureExtractor1<IdentifiedAnnotation> extractor : this.entityFeatureExtractors ) {
if ( extractor instanceof AbstractWindowedFeatureExtractor1 ) {
- ((AbstractWindowedFeatureExtractor1)extractor).setSentences( new ArrayList<>( sentenceTreeMap.keySet() ) );
+// ((AbstractWindowedFeatureExtractor1)extractor).setSentences( new ArrayList<>( sentenceTreeMap.keySet() ) );
+ ((AbstractWindowedFeatureExtractor1)extractor).setSentences( sentenceList );
}
}
for ( FeatureExtractor1<IdentifiedAnnotation> extractor : this.entityTreeExtractors ) {
if ( extractor instanceof AbstractWindowedFeatureExtractor1 ) {
- ((AbstractWindowedFeatureExtractor1)extractor).setSentences( new ArrayList<>( sentenceTreeMap.keySet() ) );
+// ((AbstractWindowedFeatureExtractor1)extractor).setSentences( new ArrayList<>( sentenceTreeMap.keySet() ) );
+ ((AbstractWindowedFeatureExtractor1)extractor).setSentences( sentenceList );
}
}
@@ -501,8 +505,8 @@ public abstract class WindowedAssertionCleartkAnalysisEngine extends
/**
* Looks in the domain string (path) for meaningful corpus names
*
- * @param dir
- * @return
+ * @param dir -
+ * @return -
*/
public static String normalizeToDomain( String dir ) {
// TODO: real normalization
@@ -511,8 +515,8 @@ public abstract class WindowedAssertionCleartkAnalysisEngine extends
Collections.addAll( parts, p );
Collections.reverse( parts );
for ( String part : parts ) {
- if ( part.toLowerCase().startsWith( "test" ) || part.toLowerCase().startsWith( "train" ) ||
- part.toLowerCase().startsWith( "dev" ) ) {
+ final String lowerPart = part.toLowerCase();
+ if ( lowerPart.startsWith( "test" ) || lowerPart.startsWith( "train" ) || lowerPart.startsWith( "dev" ) ) {
continue;
}
return part;
diff --git a/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractWindowedContext.java b/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractWindowedContext.java
index 7c01c0e..608e1e6 100644
--- a/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractWindowedContext.java
+++ b/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractWindowedContext.java
@@ -18,7 +18,7 @@ abstract public class AbstractWindowedContext implements CleartkExtractor.Contex
protected int begin;
protected int end;
- private String name;
+ private final String name;
protected List<Annotation> _windowCovered = new ArrayList<>();
diff --git a/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipeline.java b/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipeline.java
index 661d575..d17c019 100644
--- a/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipeline.java
+++ b/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipeline.java
@@ -20,18 +20,13 @@ package org.apache.ctakes.assertion.pipelines;
import java.io.IOException;
+import java.util.Collections;
import org.apache.ctakes.core.ae.SHARPKnowtatorXMLReader;
-import org.apache.ctakes.core.cc.XmiWriterCasConsumerCtakes;
-import org.apache.ctakes.core.cr.FilesInDirectoryCollectionReader;
+import org.apache.ctakes.core.cc.FileTreeXmiWriter;
+import org.apache.ctakes.core.config.ConfigParameterConstants;
+import org.apache.ctakes.core.pipeline.PipelineBuilder;
import org.apache.uima.UIMAException;
-import org.apache.uima.analysis_engine.AnalysisEngineDescription;
-import org.apache.uima.collection.CollectionReaderDescription;
-import org.apache.uima.fit.factory.AnalysisEngineFactory;
-import org.apache.uima.fit.factory.CollectionReaderFactory;
-import org.apache.uima.fit.factory.TypeSystemDescriptionFactory;
-import org.apache.uima.fit.pipeline.SimplePipeline;
-import org.apache.uima.resource.metadata.TypeSystemDescription;
/**
*
@@ -45,47 +40,62 @@ public class GoldEntityAndAttributeReaderPipeline {
public static void main(String[] args) throws UIMAException, IOException {
- TypeSystemDescription typeSystemDescription =
- // use the uimafit method of finding available type system
- // descriptor via META-INF/org.apache.uima.fit/types.txt
- // (found in ctakes-type-system/src/main/resources)
- TypeSystemDescriptionFactory.createTypeSystemDescription();
-
- CollectionReaderDescription collectionReader = CollectionReaderFactory.createReaderDescription(
- FilesInDirectoryCollectionReader.class,
- typeSystemDescription,
- "InputDirectory",
-// "/Users/m081914/work/data/sharp/Seed_Corpus/Mayo/UMLS_CEM/ss1_batch10/Knowtator/text"
- "/Users/m081914/work/sharpattr/ctakes/ctakes-assertion/sharp_data/one/Knowtator/text"
- //"/work/medfacts/sharp/data/2012-10-16_full_data_set_updated/Seed_Corpus/sandbox/batch02_mayo/text"
- );
-
-// AnalysisEngineDescription goldAnnotator = AnalysisEngineFactory.createEngineDescription(
-// GoldEntityAndAttributeReader.class,
+// TypeSystemDescription typeSystemDescription =
+// // use the uimafit method of finding available type system
+// // descriptor via META-INF/org.apache.uima.fit/types.txt
+// // (found in ctakes-type-system/src/main/resources)
+// TypeSystemDescriptionFactory.createTypeSystemDescription();
+//
+// CollectionReaderDescription collectionReader = CollectionReaderFactory.createReaderDescription(
+// FilesInDirectoryCollectionReader.class,
// typeSystemDescription,
// "InputDirectory",
-//// "/Users/m081914/work/data/sharp/Seed_Corpus/Mayo/UMLS_CEM/ss1_batch10/Knowtator XML/");
-// "/Users/m081914/work/sharpattr/ctakes/ctakes-assertion/sharp_data/one/xml/");
-//// "/work/medfacts/sharp/data/2012-10-16_full_data_set_updated/Seed_Corpus/sandbox/batch02_mayo/knowtator/");
+//// "/Users/m081914/work/data/sharp/Seed_Corpus/Mayo/UMLS_CEM/ss1_batch10/Knowtator/text"
+// "/Users/m081914/work/sharpattr/ctakes/ctakes-assertion/sharp_data/one/Knowtator/text"
+// //"/work/medfacts/sharp/data/2012-10-16_full_data_set_updated/Seed_Corpus/sandbox/batch02_mayo/text"
+// );
+//
+//// AnalysisEngineDescription goldAnnotator = AnalysisEngineFactory.createEngineDescription(
+//// GoldEntityAndAttributeReader.class,
+//// typeSystemDescription,
+//// "InputDirectory",
+////// "/Users/m081914/work/data/sharp/Seed_Corpus/Mayo/UMLS_CEM/ss1_batch10/Knowtator XML/");
+//// "/Users/m081914/work/sharpattr/ctakes/ctakes-assertion/sharp_data/one/xml/");
+////// "/work/medfacts/sharp/data/2012-10-16_full_data_set_updated/Seed_Corpus/sandbox/batch02_mayo/knowtator/");
+//
+// AnalysisEngineDescription goldAnnotator = AnalysisEngineFactory.createEngineDescription(
+// SHARPKnowtatorXMLReader.class,
+// typeSystemDescription,
+// "KnowtatorURI",
+// "/Users/m081914/work/sharpattr/ctakes/ctakes-assertion/sharp_data/one/Knowtator_XML",
+// "TextURI",
+// "/Users/m081914/work/sharpattr/ctakes/ctakes-assertion/sharp_data/one/Knowtator/text");
+//
+// AnalysisEngineDescription xWriter = AnalysisEngineFactory.createEngineDescription(
+// XmiWriterCasConsumerCtakes.class,
+// typeSystemDescription,
+// XmiWriterCasConsumerCtakes.PARAM_OUTPUTDIR,
+//// "/Users/m081914/work/data/sharp/Seed_Corpus/Mayo/UMLS_CEM/ss1_batch10/Knowtator XMI/",
+// "/Users/m081914/work/sharpattr/ctakes/ctakes-assertion/sharp_data/one/Knowtator_XMI/"
+// // "/work/medfacts/sharp/data/2012-10-09_full_data_set/batch02",
+//// "/work/medfacts/sharp/data/2012-10-16_full_data_set_updated/Seed_Corpus/sandbox/batch02_mayo/xmi",
+// );
+//
+// SimplePipeline.runPipeline(collectionReader, goldAnnotator, xWriter);
- AnalysisEngineDescription goldAnnotator = AnalysisEngineFactory.createEngineDescription(
- SHARPKnowtatorXMLReader.class,
- typeSystemDescription,
- "KnowtatorURI",
- "/Users/m081914/work/sharpattr/ctakes/ctakes-assertion/sharp_data/one/Knowtator_XML",
- "TextURI",
- "/Users/m081914/work/sharpattr/ctakes/ctakes-assertion/sharp_data/one/Knowtator/text");
-
- AnalysisEngineDescription xWriter = AnalysisEngineFactory.createEngineDescription(
- XmiWriterCasConsumerCtakes.class,
- typeSystemDescription,
- XmiWriterCasConsumerCtakes.PARAM_OUTPUTDIR,
-// "/Users/m081914/work/data/sharp/Seed_Corpus/Mayo/UMLS_CEM/ss1_batch10/Knowtator XMI/",
- "/Users/m081914/work/sharpattr/ctakes/ctakes-assertion/sharp_data/one/Knowtator_XMI/"
- // "/work/medfacts/sharp/data/2012-10-09_full_data_set/batch02",
-// "/work/medfacts/sharp/data/2012-10-16_full_data_set_updated/Seed_Corpus/sandbox/batch02_mayo/xmi",
- );
-
- SimplePipeline.runPipeline(collectionReader, goldAnnotator, xWriter);
+ // This is much simpler than the dozen or so lines above.
+ new PipelineBuilder().set( ConfigParameterConstants.PARAM_INPUTDIR,
+ "/Users/m081914/work/sharpattr/ctakes/ctakes-assertion/sharp_data/one/Knowtator/text" )
+ .readFiles()
+ .add( SHARPKnowtatorXMLReader.class,
+ Collections.emptyList(),
+ "KnowtatorURI",
+ "/Users/m081914/work/sharpattr/ctakes/ctakes-assertion/sharp_data/one/Knowtator_XML",
+ "TextURI",
+ "/Users/m081914/work/sharpattr/ctakes/ctakes-assertion/sharp_data/one/Knowtator/text" )
+ .set( ConfigParameterConstants.PARAM_OUTPUTDIR,
+ "/Users/m081914/work/sharpattr/ctakes/ctakes-assertion/sharp_data/one/Knowtator_XMI" )
+ .add( FileTreeXmiWriter.class )
+ .run();
}
}
diff --git a/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java b/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java
index 9c6270f..e5d890a 100644
--- a/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java
+++ b/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java
@@ -29,8 +29,9 @@ import org.apache.ctakes.assertion.cr.MiPACQKnowtatorXMLReader;
import org.apache.ctakes.assertion.cr.NegExCorpusReader;
import org.apache.ctakes.assertion.pipelines.SharpCorpusSplit.Subcorpus;
import org.apache.ctakes.core.ae.SHARPKnowtatorXMLReader;
-import org.apache.ctakes.core.cc.XmiWriterCasConsumerCtakes;
-import org.apache.ctakes.core.cr.FilesInDirectoryCollectionReader;
+import org.apache.ctakes.core.cc.FileTreeXmiWriter;
+import org.apache.ctakes.core.config.ConfigParameterConstants;
+import org.apache.ctakes.core.cr.FileTreeReader;
import org.apache.log4j.Logger;
import org.apache.uima.UIMAException;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
@@ -90,14 +91,15 @@ public class GoldEntityAndAttributeReaderPipelineForSeedCorpus {
return;
}
- File batchDirectories[] = parentDirectory.listFiles(new FileFilter() {
+ File[] batchDirectories = parentDirectory.listFiles( new FileFilter() {
@Override
public boolean accept(File pathname) {
return pathname.isDirectory();
}
- });
-
+ } );
+
+ assert batchDirectories != null;
for (File currentBatchDirectory : batchDirectories)
{
@@ -143,9 +145,11 @@ public class GoldEntityAndAttributeReaderPipelineForSeedCorpus {
AggregateBuilder aggregate = new AggregateBuilder();
CollectionReaderDescription collectionReader = CollectionReaderFactory.createReaderDescription(
- FilesInDirectoryCollectionReader.class,
- typeSystemDescription,
- "InputDirectory",
+// FilesInDirectoryCollectionReader.class,
+// typeSystemDescription,
+// "InputDirectory",
+ FileTreeReader.class,
+ ConfigParameterConstants.PARAM_INPUTDIR,
textDirectory.toString()
);
@@ -161,9 +165,11 @@ public class GoldEntityAndAttributeReaderPipelineForSeedCorpus {
// write just the XMI version of what's in Knowtator UMLS_CEM
AnalysisEngineDescription xWriter = AnalysisEngineFactory.createEngineDescription(
- XmiWriterCasConsumerCtakes.class,
- typeSystemDescription,
- XmiWriterCasConsumerCtakes.PARAM_OUTPUTDIR,
+// XmiWriterCasConsumerCtakes.class,
+// typeSystemDescription,
+// XmiWriterCasConsumerCtakes.PARAM_OUTPUTDIR,
+ FileTreeXmiWriter.class,
+ ConfigParameterConstants.PARAM_OUTPUTDIR,
xmiDirectory.toString()
);
aggregate.add(xWriter);
@@ -194,9 +200,11 @@ public class GoldEntityAndAttributeReaderPipelineForSeedCorpus {
break;
}
AnalysisEngineDescription xWriter2 = AnalysisEngineFactory.createEngineDescription(
- XmiWriterCasConsumerCtakes.class,
- typeSystemDescription,
- XmiWriterCasConsumerCtakes.PARAM_OUTPUTDIR,
+// XmiWriterCasConsumerCtakes.class,
+// typeSystemDescription,
+// XmiWriterCasConsumerCtakes.PARAM_OUTPUTDIR,
+ FileTreeXmiWriter.class,
+ ConfigParameterConstants.PARAM_OUTPUTDIR,
subcorpusDirectory
);
aggregate.add(xWriter2);
@@ -240,7 +248,8 @@ public class GoldEntityAndAttributeReaderPipelineForSeedCorpus {
public boolean accept(File pathname) {
return pathname.isDirectory();
}});
- for(File batchDir : batches){
+ assert batches != null;
+ for(File batchDir : batches){
TypeSystemDescription typeSystemDescription =
// use the uimafit method of finding available type system
// descriptor via META-INF/org.apache.uima.fit/types.txt
@@ -251,10 +260,12 @@ public class GoldEntityAndAttributeReaderPipelineForSeedCorpus {
AggregateBuilder aggregate = new AggregateBuilder();
CollectionReaderDescription collectionReader = CollectionReaderFactory.createReaderDescription(
- FilesInDirectoryCollectionReader.class,
- typeSystemDescription,
- "InputDirectory",
- textDirectory.toString()
+// FilesInDirectoryCollectionReader.class,
+// typeSystemDescription,
+// "InputDirectory",
+ FileTreeReader.class,
+ ConfigParameterConstants.PARAM_INPUTDIR,
+ textDirectory.toString()
);
// read the UMLS_CEM data from Knowtator
@@ -293,9 +304,11 @@ public class GoldEntityAndAttributeReaderPipelineForSeedCorpus {
}
AnalysisEngineDescription xWriter = AnalysisEngineFactory.createEngineDescription(
- XmiWriterCasConsumerCtakes.class,
- typeSystemDescription,
- XmiWriterCasConsumerCtakes.PARAM_OUTPUTDIR,
+// XmiWriterCasConsumerCtakes.class,
+// typeSystemDescription,
+// XmiWriterCasConsumerCtakes.PARAM_OUTPUTDIR,
+ FileTreeXmiWriter.class,
+ ConfigParameterConstants.PARAM_OUTPUTDIR,
subcorpusDir
);
aggregate.add(xWriter);
@@ -331,9 +344,11 @@ public class GoldEntityAndAttributeReaderPipelineForSeedCorpus {
if (preprocessedDirectory!=null) {
AnalysisEngineDescription xWriter2 = AnalysisEngineFactory.createEngineDescription(
- XmiWriterCasConsumerCtakes.class,
- typeSystemDescription,
- XmiWriterCasConsumerCtakes.PARAM_OUTPUTDIR,
+// XmiWriterCasConsumerCtakes.class,
+// typeSystemDescription,
+// XmiWriterCasConsumerCtakes.PARAM_OUTPUTDIR,
+ FileTreeXmiWriter.class,
+ ConfigParameterConstants.PARAM_OUTPUTDIR,
preprocessedDirectory
);
aggregate.add(xWriter2);
@@ -366,9 +381,11 @@ public class GoldEntityAndAttributeReaderPipelineForSeedCorpus {
if (preprocessedDirectory!=null) {
AnalysisEngineDescription xWriter2 = AnalysisEngineFactory.createEngineDescription(
- XmiWriterCasConsumerCtakes.class,
- typeSystemDescription,
- XmiWriterCasConsumerCtakes.PARAM_OUTPUTDIR,
+// XmiWriterCasConsumerCtakes.class,
+// typeSystemDescription,
+// XmiWriterCasConsumerCtakes.PARAM_OUTPUTDIR,
+ FileTreeXmiWriter.class,
+ ConfigParameterConstants.PARAM_OUTPUTDIR,
preprocessedDirectory
);
aggregate.add(xWriter2);
@@ -393,9 +410,11 @@ public class GoldEntityAndAttributeReaderPipelineForSeedCorpus {
AggregateBuilder aggregate = new AggregateBuilder();
CollectionReaderDescription collectionReader = CollectionReaderFactory.createReaderDescription(
- FilesInDirectoryCollectionReader.class,
- typeSystemDescription,
- "InputDirectory",
+// FilesInDirectoryCollectionReader.class,
+// typeSystemDescription,
+// "InputDirectory",
+ FileTreeReader.class,
+ ConfigParameterConstants.PARAM_INPUTDIR,
inDir
);
@@ -416,9 +435,11 @@ public class GoldEntityAndAttributeReaderPipelineForSeedCorpus {
if (preprocessedDirectory!=null) {
AnalysisEngineDescription xWriter2 = AnalysisEngineFactory.createEngineDescription(
- XmiWriterCasConsumerCtakes.class,
- typeSystemDescription,
- XmiWriterCasConsumerCtakes.PARAM_OUTPUTDIR,
+// XmiWriterCasConsumerCtakes.class,
+// typeSystemDescription,
+// XmiWriterCasConsumerCtakes.PARAM_OUTPUTDIR,
+ FileTreeXmiWriter.class,
+ ConfigParameterConstants.PARAM_OUTPUTDIR,
splitMipacq.get(inDir)
);
aggregate.add(xWriter2);
diff --git a/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/ClinicalPipelineWithUmls.java b/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/ClinicalPipelineWithUmls.java
index 989a8ba..c307c5b 100644
--- a/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/ClinicalPipelineWithUmls.java
+++ b/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/ClinicalPipelineWithUmls.java
@@ -20,16 +20,16 @@ package org.apache.ctakes.clinicalpipeline;
import org.apache.ctakes.assertion.util.AssertionConst;
+import org.apache.ctakes.core.cc.FileTreeXmiWriter;
import org.apache.ctakes.core.util.doc.DocIdUtil;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.collection.CollectionReaderDescription;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.pipeline.SimplePipeline;
-import org.apache.uima.fit.util.CasIOUtil;
import org.apache.uima.jcas.JCas;
import org.cleartk.util.cr.FilesCollectionReader;
-import java.io.File;
+import java.io.*;
import java.util.Date;
/**
@@ -62,8 +62,10 @@ public class ClinicalPipelineWithUmls {
AnalysisEngineDescription pipelineIncludingUmlsDictionaries = AnalysisEngineFactory.createEngineDescription(
"desc/analysis_engine/AggregatePlaintextUMLSProcessor");
+ final FileTreeXmiWriter xmiWriter = new FileTreeXmiWriter();
for(JCas jCas : SimplePipeline.iteratePipeline(collectionReader, pipelineIncludingUmlsDictionaries)){
- CasIOUtil.writeXmi( jCas, new File( AssertionConst.evalOutputDir, DocIdUtil.getDocumentID( jCas ) + ".xmi" ) );
+ final String docId = DocIdUtil.getDocumentID( jCas );
+ xmiWriter.writeFile( jCas, AssertionConst.evalOutputDir, docId, docId );
}
System.out.println("Done at " + new Date());
diff --git a/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java b/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java
index 307a08c..dc3807a 100644
--- a/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java
+++ b/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java
@@ -57,7 +57,7 @@ final public class FileTreeReader extends AbstractFileTreeReader {
* @return text in file
* @throws IOException if the file could not be read
*/
- private String readFile( final File file ) throws IOException {
+ public String readFile( final File file ) throws IOException {
LOGGER.info( "Reading " + file.getPath() + " ..." );
if ( !isKeepCrChar() ) {
try {
diff --git a/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/EventCoreferenceAnnotator.java b/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/EventCoreferenceAnnotator.java
index 898f4e9..a3cf68b 100644
--- a/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/EventCoreferenceAnnotator.java
+++ b/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/EventCoreferenceAnnotator.java
@@ -142,7 +142,8 @@ public class EventCoreferenceAnnotator extends RelationExtractorAnnotator {
}
@Override
- protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors() {
+ protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors()
+ throws ResourceInitializationException {
List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> featureExtractorList = new ArrayList<>();
// pick and choose from base class:
diff --git a/ctakes-dictionary-lookup-fast/src/user/resources/org/apache/ctakes/dictionary/lookup/fast/pipeline/DictionarySubPipe.piper b/ctakes-dictionary-lookup-fast/src/user/resources/org/apache/ctakes/dictionary/lookup/fast/pipeline/DictionarySubPipe.piper
index dca032d..871afdd 100644
--- a/ctakes-dictionary-lookup-fast/src/user/resources/org/apache/ctakes/dictionary/lookup/fast/pipeline/DictionarySubPipe.piper
+++ b/ctakes-dictionary-lookup-fast/src/user/resources/org/apache/ctakes/dictionary/lookup/fast/pipeline/DictionarySubPipe.piper
@@ -3,8 +3,6 @@
// path to the xml file containing information for dictionary lookup configuration.
cli LookupXml=l
// umls credentials
-cli umlsUser=user
-cli umlsPass=pass
cli umlsKey=key
// Default fast dictionary lookup
diff --git a/ctakes-dictionary-lookup-fast/src/user/resources/org/apache/ctakes/dictionary/lookup/fast/pipeline/TsDictionarySubPipe.piper b/ctakes-dictionary-lookup-fast/src/user/resources/org/apache/ctakes/dictionary/lookup/fast/pipeline/TsDictionarySubPipe.piper
index 55f5699..6f7cf50 100644
--- a/ctakes-dictionary-lookup-fast/src/user/resources/org/apache/ctakes/dictionary/lookup/fast/pipeline/TsDictionarySubPipe.piper
+++ b/ctakes-dictionary-lookup-fast/src/user/resources/org/apache/ctakes/dictionary/lookup/fast/pipeline/TsDictionarySubPipe.piper
@@ -3,8 +3,6 @@
// path to the xml file containing information for dictionary lookup configuration.
cli LookupXml=l
// umls credentials
-cli umlsUser=user
-cli umlsPass=pass
cli umlsKey=key
// Default fast dictionary lookup
diff --git a/ctakes-distribution/src/main/bin/README b/ctakes-distribution/src/main/bin/README
new file mode 100644
index 0000000..b6a85aa
--- /dev/null
+++ b/ctakes-distribution/src/main/bin/README
@@ -0,0 +1,12 @@
+
+These scripts are meant to be run from within a binary installation.
+
+These scripts will not work in the source project.
+
+To use these scripts, build a binary installation by running
+ mvn package
+and then unzip the
+ apache-ctakes-*-bin.zip
+or
+ apache-ctakes-*.tar.z
+files in the ctakes-distribution/target/ directory in a directory for the installation.
\ No newline at end of file
diff --git a/ctakes-dockhand/src/main/java/org/apache/ctakes/dockhand/gui/feature/Option.java b/ctakes-dockhand/src/main/java/org/apache/ctakes/dockhand/gui/feature/Option.java
index a7450dd..515e866 100644
--- a/ctakes-dockhand/src/main/java/org/apache/ctakes/dockhand/gui/feature/Option.java
+++ b/ctakes-dockhand/src/main/java/org/apache/ctakes/dockhand/gui/feature/Option.java
@@ -61,8 +61,7 @@ public enum Option {
// "// path to the xml file containing information for dictionary lookup configuration.",
// "cli LookupXml=l",
"// umls credentials",
- "cli umlsUser=user",
- "cli umlsPass=pass",
+ "cli umlsKey=key",
"add DefaultJCasTermAnnotator" ),
// CUSTOM_ENTITY( "Custom Entities",
diff --git a/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipeline/MultiThreadedPipeline.java b/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipeline/MultiThreadedPipeline.java
index 01b81b3..e22ab05 100644
--- a/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipeline/MultiThreadedPipeline.java
+++ b/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipeline/MultiThreadedPipeline.java
@@ -24,7 +24,7 @@ import org.apache.ctakes.core.ae.SentenceDetector;
import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
import org.apache.ctakes.core.config.ConfigParameterConstants;
-import org.apache.ctakes.core.cr.FilesInDirectoryCollectionReader;
+import org.apache.ctakes.core.cr.FileTreeReader;
import org.apache.ctakes.core.util.doc.DocIdUtil;
import org.apache.ctakes.dependency.parser.ae.ClearNLPDependencyParserAE;
import org.apache.ctakes.dictionary.lookup2.ae.DefaultJCasTermAnnotator;
@@ -54,12 +54,13 @@ public class MultiThreadedPipeline {
public static void main(String[] args) throws ResourceInitializationException {
CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(
- FilesInDirectoryCollectionReader.class,
+// FilesInDirectoryCollectionReader.class,
+ FileTreeReader.class,
ConfigParameterConstants.PARAM_INPUTDIR,
- "src/user/resources/org/apache/ctakes/examples/annotation/anafora_annotated/",
- FilesInDirectoryCollectionReader.PARAM_RECURSE,
- true);
-
+ "src/user/resources/org/apache/ctakes/examples/annotation/anafora_annotated/" );
+// FilesInDirectoryCollectionReader.PARAM_RECURSE,
+// true);
+
AnalysisEngineDescription aed = getThreadsafePipeline();
CpeBuilder cpeBuilder = new CpeBuilder();
try{
diff --git a/ctakes-examples/src/user/resources/org/apache/ctakes/examples/pipeline/BigPipeline.piper b/ctakes-examples/src/user/resources/org/apache/ctakes/examples/pipeline/BigPipeline.piper
index 775999d..856cd14 100644
--- a/ctakes-examples/src/user/resources/org/apache/ctakes/examples/pipeline/BigPipeline.piper
+++ b/ctakes-examples/src/user/resources/org/apache/ctakes/examples/pipeline/BigPipeline.piper
@@ -1,10 +1,18 @@
+/////////////////////////////////////////////////////////
+//
+// An example pipeline that does a lot of stuff.
+//
+/////////////////////////////////////////////////////////
+
+set WriteBanner=yes
+
// Advanced Tokenization: Regex sectionization, BIO Sentence Detector (lumper), Paragraphs, Lists
load FullTokenizerPipeline
// OR use the standard tokenizer pipeline:
//load DefaultTokenizerPipeline
-// Always need these ...
+// Refined tokens, Parts of Speech
add ContextDependentTokenizerAnnotator
add POSTagger
@@ -34,7 +42,12 @@ add pretty.html.HtmlTextWriter SubDirectory=html
add pretty.plaintext.PrettyTextWriterFit SubDirectory=text
// Table output, write to subdirectory
-add SemanticTableFileWriter SubDirectory=table
+add SemanticTableFileWriter SubDirectory=bsv_table
+add SemanticTableFileWriter SubDirectory=html_table TableType=HTML
-// XMI output
+// XMI output. Warning: these can be very large.
//writeXmis
+add FileTreeXmiWriter SubDirectory=xmi
+
+// Write some information about the run
+addLast org.apache.ctakes.core.util.log.FinishedLogger
\ No newline at end of file
diff --git a/ctakes-gui/src/main/java/org/apache/ctakes/gui/component/SmoothToolTip.java b/ctakes-gui/src/main/java/org/apache/ctakes/gui/component/SmoothToolTip.java
index 1f704f2..0d21a3d 100644
--- a/ctakes-gui/src/main/java/org/apache/ctakes/gui/component/SmoothToolTip.java
+++ b/ctakes-gui/src/main/java/org/apache/ctakes/gui/component/SmoothToolTip.java
@@ -5,6 +5,7 @@ import sun.swing.SwingUtilities2;
import javax.swing.*;
import javax.swing.plaf.ToolTipUI;
+import javax.swing.plaf.basic.BasicGraphicsUtils;
import javax.swing.plaf.basic.BasicToolTipUI;
import java.awt.*;
import java.awt.geom.RoundRectangle2D;
@@ -117,8 +118,10 @@ public class SmoothToolTip extends JToolTip {
size.width - (insets.left + insets.right) - 6,
size.height - (insets.top + insets.bottom), 10, 10 );
g.setFont( font );
- SwingUtilities2.drawString( comp, g, tipText, (int)paintTextR.getX(),
- (int)paintTextR.getY() + metrics.getAscent() );
+// SwingUtilities2.drawString( comp, g, tipText, (int)paintTextR.getX(),
+// (int)paintTextR.getY() + metrics.getAscent() );
+ BasicGraphicsUtils.drawString( g, tipText, '\0', (int)paintTextR.getX(),
+ (int)paintTextR.getY() + metrics.getAscent() );
transground = new Color( foreground.getRed(), foreground.getGreen(), foreground.getBlue(), 63 );
g.setColor( transground );
g.drawRoundRect( 0, 0, size.width - 1, size.height - 1, 10, 10 );
diff --git a/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/DictionaryDownloader.java b/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/DictionaryDownloader.java
index 365beae..758faee 100644
--- a/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/DictionaryDownloader.java
+++ b/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/DictionaryDownloader.java
@@ -100,8 +100,9 @@ final public class DictionaryDownloader {
Executors.newSingleThreadExecutor().execute( download );
} );
toolBar.addSeparator( new Dimension( 50, 0 ) );
- _helpButton = addButton( toolBar, "Apache cTAKES Help" );
- _helpButton.addActionListener( e -> SystemUtil.openWebPage( "https://ctakes.apache.org/" ) );
+ _helpButton = addButton( toolBar, "Apache cTAKES UMLS Key Help" );
+ _helpButton.addActionListener(
+ e -> SystemUtil.openWebPage( "https://cwiki.apache.org/confluence/display/CTAKES/cTAKES+4.0.0.1" ) );
toolBar.addSeparator( new Dimension( 10, 0 ) );
return toolBar;
}
diff --git a/ctakes-lvg/src/main/java/org/apache/ctakes/lvg/ae/LvgBaseTokenAnnotator.java b/ctakes-lvg/src/main/java/org/apache/ctakes/lvg/ae/LvgBaseTokenAnnotator.java
index 18e1a00..4685839 100644
--- a/ctakes-lvg/src/main/java/org/apache/ctakes/lvg/ae/LvgBaseTokenAnnotator.java
+++ b/ctakes-lvg/src/main/java/org/apache/ctakes/lvg/ae/LvgBaseTokenAnnotator.java
@@ -34,15 +34,7 @@ import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Set;
-import java.util.StringTokenizer;
-import java.util.Vector;
+import java.util.*;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
@@ -52,6 +44,7 @@ import org.apache.uima.analysis_engine.annotator.AnnotatorInitializationExceptio
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.jcas.cas.FSList;
+import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
/**
@@ -94,9 +87,7 @@ public class LvgBaseTokenAnnotator extends JCasAnnotator_ImplBase {
public static final String PARAM_LEMMA_CACHE_FREQUENCY_CUTOFF = "LemmaCacheFrequencyCutoff";
// LOG4J logger based on class name
- private Logger logger = Logger.getLogger(getClass().getName());
-
- private final String LVGCMDAPI_RESRC_KEY = "LvgCmdApi";
+ private final Logger logger = Logger.getLogger( getClass().getName() );
private LvgCmdApi lvgCmd;
@@ -106,13 +97,13 @@ public class LvgBaseTokenAnnotator extends JCasAnnotator_ImplBase {
private boolean useSegments;
- private Set skipSegmentsSet;
+ private Set<String> skipSegmentsSet;
private boolean useCmdCache;
private String cmdCacheFileLocation;
private int cmdCacheFreqCutoff;
- private Map xeroxTreebankMap;
+ private Map<String,String> xeroxTreebankMap;
private boolean postLemmas;
private boolean useLemmaCache;
@@ -120,18 +111,18 @@ public class LvgBaseTokenAnnotator extends JCasAnnotator_ImplBase {
private int lemmaCacheFreqCutoff;
// key = word, value = canonical word
- private Map normCacheMap;
+ private Map<String,String> normCacheMap;
// key = word, value = Set of Lemma objects
- private Map lemmaCacheMap;
+ private Map<String,Collection<LemmaLocalClass>> lemmaCacheMap;
- private Set exclusionSet;
+ private Set<String> exclusionSet;
/**
* Performs initialization logic. This implementation just reads values for
* the configuration parameters.
*
- * @see org.apache.uima.analysis_engine.annotator.BaseAnnotator#initialize(AnnotatorContext)
+// * @see org.apache.uima.analysis_engine.annotator.BaseAnnotator#initialize(AnnotatorContext)
*/
public void initialize(UimaContext aContext)
throws ResourceInitializationException {
@@ -141,13 +132,14 @@ public class LvgBaseTokenAnnotator extends JCasAnnotator_ImplBase {
configInit();
try {
+ String LVGCMDAPI_RESRC_KEY = "LvgCmdApi";
LvgCmdApiResource lvgResource = (LvgCmdApiResource) context
- .getResourceObject(LVGCMDAPI_RESRC_KEY);
+ .getResourceObject( LVGCMDAPI_RESRC_KEY );
if (lvgResource == null)
throw new AnnotatorInitializationException(new Exception(
"Unable to locate resource with key="
- + LVGCMDAPI_RESRC_KEY + "."));
+ + LVGCMDAPI_RESRC_KEY + "."));
lvgCmd = lvgResource.getLvg();
@@ -175,52 +167,42 @@ public class LvgBaseTokenAnnotator extends JCasAnnotator_ImplBase {
* Sets configuration parameters with values from the descriptor.
*/
private void configInit() throws ResourceInitializationException {
- useSegments = ((Boolean) context.getConfigParameterValue("UseSegments"))
- .booleanValue();
+ useSegments = (Boolean) context.getConfigParameterValue( "UseSegments" );
String[] skipSegmentIDs = (String[]) context
.getConfigParameterValue("SegmentsToSkip");
- skipSegmentsSet = new HashSet();
- for (int i = 0; i < skipSegmentIDs.length; i++) {
- skipSegmentsSet.add(skipSegmentIDs[i]);
- }
+ skipSegmentsSet = new HashSet<>();
+ skipSegmentsSet.addAll( Arrays.asList( skipSegmentIDs ) );
// Load Xerox Treebank tagset map
- String xtMaps[] = (String[]) context
- .getConfigParameterValue("XeroxTreebankMap");
- xeroxTreebankMap = new HashMap();
- for (int i = 0; i < xtMaps.length; i++) {
- StringTokenizer tokenizer = new StringTokenizer(xtMaps[i], "|");
- if (tokenizer.countTokens() == 2) {
+ String[] xtMaps = (String[]) context.getConfigParameterValue("XeroxTreebankMap");
+ xeroxTreebankMap = new HashMap<>();
+ for ( final String map : xtMaps ) {
+ StringTokenizer tokenizer = new StringTokenizer( map, "|" );
+ if ( tokenizer.countTokens() == 2 ) {
String xTag = tokenizer.nextToken();
String tTag = tokenizer.nextToken();
- xeroxTreebankMap.put(xTag, tTag);
+ xeroxTreebankMap.put( xTag, tTag );
}
}
- useCmdCache = ((Boolean) context.getConfigParameterValue("UseCmdCache"))
- .booleanValue();
+ useCmdCache = (Boolean) context.getConfigParameterValue( "UseCmdCache" );
- cmdCacheFileLocation = (String) context
- .getConfigParameterValue("CmdCacheFileLocation");
+ cmdCacheFileLocation = (String) context.getConfigParameterValue("CmdCacheFileLocation");
- cmdCacheFreqCutoff = ((Integer) context
- .getConfigParameterValue("CmdCacheFrequencyCutoff")).intValue();
+ cmdCacheFreqCutoff = (Integer) context.getConfigParameterValue( "CmdCacheFrequencyCutoff" );
String[] wordsToExclude = (String[]) context
.getConfigParameterValue("ExclusionSet");
- exclusionSet = new HashSet();
- for (int i = 0; i < wordsToExclude.length; i++) {
- exclusionSet.add(wordsToExclude[i]);
- }
+ exclusionSet = new HashSet<>();
+ exclusionSet.addAll( Arrays.asList( wordsToExclude ) );
Boolean bPostLemmas = (Boolean) context
.getConfigParameterValue(PARAM_POST_LEMMAS);
- postLemmas = bPostLemmas == null ? false : bPostLemmas.booleanValue();
+ postLemmas = bPostLemmas != null && bPostLemmas;
if (postLemmas) {
Boolean useLemmaCache = (Boolean) context
.getConfigParameterValue(PARAM_USE_LEMMA_CACHE);
- useLemmaCache = useLemmaCache == null ? false : useLemmaCache
- .booleanValue();
+ useLemmaCache = useLemmaCache != null && useLemmaCache;
if (useLemmaCache) {
lemmaCacheFileLocation = (String) context
.getConfigParameterValue(PARAM_LEMMA_CACHE_FILE_LOCATION);
@@ -228,12 +210,10 @@ public class LvgBaseTokenAnnotator extends JCasAnnotator_ImplBase {
throw new ResourceInitializationException(new Exception(
"Parameter for " + PARAM_LEMMA_CACHE_FILE_LOCATION
+ " was not set."));
- Integer lemmaCacheFreqCutoff = (Integer) context
- .getConfigParameterValue(PARAM_LEMMA_CACHE_FREQUENCY_CUTOFF);
- if (lemmaCacheFreqCutoff == null)
- lemmaCacheFreqCutoff = 20;
- else
- lemmaCacheFreqCutoff = lemmaCacheFreqCutoff.intValue();
+// Integer lemmaCacheFreqCutoff = (Integer) context
+// .getConfigParameterValue(PARAM_LEMMA_CACHE_FREQUENCY_CUTOFF);
+// if (lemmaCacheFreqCutoff == null)
+// lemmaCacheFreqCutoff = 20;
}
}
}
@@ -251,16 +231,14 @@ public class LvgBaseTokenAnnotator extends JCasAnnotator_ImplBase {
try {
if (useSegments) {
JFSIndexRepository indexes = jcas.getJFSIndexRepository();
- Iterator segmentItr = indexes.getAnnotationIndex(Segment.type)
- .iterator();
- while (segmentItr.hasNext()) {
- Segment segmentAnnotation = (Segment) segmentItr.next();
+ for ( final Annotation annotation : indexes.getAnnotationIndex( Segment.type ) ) {
+ Segment segmentAnnotation = (Segment) annotation;
String segmentID = segmentAnnotation.getId();
- if (!skipSegmentsSet.contains(segmentID)) {
+ if ( !skipSegmentsSet.contains( segmentID ) ) {
int start = segmentAnnotation.getBegin();
int end = segmentAnnotation.getEnd();
- annotateRange(jcas, text, start, end);
+ annotateRange( jcas, text, start, end );
}
}
} else {
@@ -279,22 +257,20 @@ public class LvgBaseTokenAnnotator extends JCasAnnotator_ImplBase {
protected void annotateRange(JCas jcas, String text, int rangeBegin,
int rangeEnd) throws AnalysisEngineProcessException {
JFSIndexRepository indexes = jcas.getJFSIndexRepository();
- Iterator tokenItr = indexes.getAnnotationIndex(BaseToken.type)
- .iterator();
- while (tokenItr.hasNext()) {
- BaseToken tokenAnnotation = (BaseToken) tokenItr.next();
- if (tokenAnnotation.getBegin() >= rangeBegin
- && tokenAnnotation.getEnd() <= rangeEnd) {
- String token = text.substring(tokenAnnotation.getBegin(),
- tokenAnnotation.getEnd());
+ for ( final Annotation annotation : indexes.getAnnotationIndex( BaseToken.type ) ) {
+ BaseToken tokenAnnotation = (BaseToken) annotation;
+ if ( tokenAnnotation.getBegin() >= rangeBegin
+ && tokenAnnotation.getEnd() <= rangeEnd ) {
+ String token = text.substring( tokenAnnotation.getBegin(),
+ tokenAnnotation.getEnd() );
// skip past words that are part of the exclusion set
- if (exclusionSet.contains(token))
+ if ( exclusionSet.contains( token ) )
continue;
- setNormalizedForm(tokenAnnotation, token);
- if (postLemmas)
- setLemma(tokenAnnotation, token, jcas);
+ setNormalizedForm( tokenAnnotation, token );
+ if ( postLemmas )
+ setLemma( tokenAnnotation, token, jcas );
}
}
}
@@ -304,10 +280,10 @@ public class LvgBaseTokenAnnotator extends JCasAnnotator_ImplBase {
// apply LVG processing to get canonical form
String normalizedForm = null;
if (useCmdCache) {
- normalizedForm = (String) normCacheMap.get(token);
- if (normalizedForm == null) {
+ normalizedForm = normCacheMap.get(token);
+// if (normalizedForm == null) {
// logger.info("["+ word+ "] was not found in LVG norm cache.");
- }
+// }
}
// only apply LVG processing if not found in cache first
@@ -336,49 +312,44 @@ public class LvgBaseTokenAnnotator extends JCasAnnotator_ImplBase {
throws AnalysisEngineProcessException {
// apply LVG processing to get lemmas
// key = lemma string, value = Set of POS tags
- Map lemmaMap = null;
+ Map<String,Collection<String>> lemmaMap = null;
if (useLemmaCache) {
- Set lemmaSet = (Set) lemmaCacheMap.get(word);
+ Collection<LemmaLocalClass> lemmaSet = lemmaCacheMap.get(word);
if (lemmaSet == null) {
// logger.info("["+ word+
// "] was not found in LVG lemma cache.");
} else {
- lemmaMap = new HashMap();
- Iterator lemmaItr = lemmaSet.iterator();
- while (lemmaItr.hasNext()) {
- LemmaLocalClass l = (LemmaLocalClass) lemmaItr.next();
- lemmaMap.put(l.word, l.posSet);
+ lemmaMap = new HashMap<>();
+ for ( final LemmaLocalClass l : lemmaSet ) {
+ lemmaMap.put( l.word, l.posSet );
}
}
}
if (lemmaMap == null) {
- lemmaMap = new HashMap();
+ lemmaMap = new HashMap<>();
try {
- Vector lexItems = lvgLexItem.MutateLexItem(word);
- Iterator lexItemItr = lexItems.iterator();
- while (lexItemItr.hasNext()) {
- LexItem li = (LexItem) lexItemItr.next();
-
- Category c = li.GetTargetCategory();
- String lemmaStr = li.GetTargetTerm();
- long[] bitValues = Category.ToValuesArray(c.GetValue());
- for (int i = 0; i < bitValues.length; i++) {
+ Vector<LexItem> lexItems = lvgLexItem.MutateLexItem( word );
+ for ( final LexItem item : lexItems ) {
+ Category c = item.GetTargetCategory();
+ String lemmaStr = item.GetTargetTerm();
+ long[] bitValues = Category.ToValuesArray( c.GetValue() );
+ for ( final long value : bitValues ) {
// note that POS is Xerox tagset
- String lemmaPos = Category.ToName(bitValues[i]);
+ String lemmaPos = Category.ToName( value );
// convert Xerox tagset to PennTreebank tagset
- String treebankTag = (String) xeroxTreebankMap
- .get(lemmaPos);
- if (treebankTag != null) {
- Set posSet = null;
- if (lemmaMap.containsKey(lemmaStr)) {
- posSet = (Set) lemmaMap.get(lemmaStr);
+ String treebankTag = xeroxTreebankMap
+ .get( lemmaPos );
+ if ( treebankTag != null ) {
+ Collection<String> posSet = null;
+ if ( lemmaMap.containsKey( lemmaStr ) ) {
+ posSet = lemmaMap.get( lemmaStr );
} else {
- posSet = new HashSet();
+ posSet = new HashSet<>();
}
- posSet.add(treebankTag);
- lemmaMap.put(lemmaStr, posSet);
+ posSet.add( treebankTag );
+ lemmaMap.put( lemmaStr, posSet );
}
}
}
@@ -389,22 +360,18 @@ public class LvgBaseTokenAnnotator extends JCasAnnotator_ImplBase {
// add lemma information to CAS
// FSArray lemmas = new FSArray(jcas, lemmaMap.keySet().size());
- Collection lemmas = new ArrayList(lemmaMap.keySet().size());
-
- Iterator lemmaStrItr = lemmaMap.keySet().iterator();
- while (lemmaStrItr.hasNext()) {
- String form = (String) lemmaStrItr.next();
- Set posTagSet = (Set) lemmaMap.get(form);
- Iterator posTagItr = posTagSet.iterator();
- while (posTagItr.hasNext()) {
- String pos = (String) posTagItr.next(); // part of speech
- Lemma lemma = new Lemma(jcas);
- lemma.setKey(form);
- lemma.setPosTag(pos);
- lemmas.add(lemma);
+ Collection<Lemma> lemmas = new ArrayList<>(lemmaMap.keySet().size());
+
+ for ( final String form : lemmaMap.keySet() ) {
+ Collection<String> posTagSet = lemmaMap.get( form );
+ for ( final String pos : posTagSet ) {
+ Lemma lemma = new Lemma( jcas );
+ lemma.setKey( form );
+ lemma.setPosTag( pos );
+ lemmas.add( lemma );
}
}
- Lemma[] lemmaArray = (Lemma[]) lemmas.toArray(new Lemma[lemmas.size()]);
+ Lemma[] lemmaArray = lemmas.toArray( new Lemma[ 0 ] );
FSList fsList = ListFactory.buildList(jcas, lemmaArray);
wordAnnotation.setLemmaEntries(fsList);
}
@@ -412,7 +379,7 @@ public class LvgBaseTokenAnnotator extends JCasAnnotator_ImplBase {
/**
* Helper method that loads a Norm cache file.
*
- * @param location
+ * @param cpLocation -
*/
private void loadCmdCacheFile(String cpLocation)
throws FileNotFoundException, IOException {
@@ -423,7 +390,7 @@ public class LvgBaseTokenAnnotator extends JCasAnnotator_ImplBase {
BufferedReader br = new BufferedReader(new InputStreamReader(inStream));
// initialize map
- normCacheMap = new HashMap();
+ normCacheMap = new HashMap<>();
String line = br.readLine();
while (line != null) {
@@ -452,7 +419,7 @@ public class LvgBaseTokenAnnotator extends JCasAnnotator_ImplBase {
/**
* Helper method that loads a Lemma cache file.
*
- * @param location
+ * @param cpLocation -
*/
private void loadLemmaCacheFile(String cpLocation)
throws FileNotFoundException, IOException {
@@ -463,7 +430,7 @@ public class LvgBaseTokenAnnotator extends JCasAnnotator_ImplBase {
BufferedReader br = new BufferedReader(new InputStreamReader(inStream));
// initialize map
- lemmaCacheMap = new HashMap();
+ lemmaCacheMap = new HashMap<>();
String line = br.readLine();
while (line != null) {
@@ -484,24 +451,24 @@ public class LvgBaseTokenAnnotator extends JCasAnnotator_ImplBase {
// construct Lemma object
LemmaLocalClass l = new LemmaLocalClass();
l.word = lemmaWord;
- l.posSet = new HashSet();
+ l.posSet = new HashSet<>();
long bitVector = Category.ToValue(combinedCategories);
long[] bitValues = Category.ToValuesArray(bitVector);
- for (int i = 0; i < bitValues.length; i++) {
- String pos = Category.ToName(bitValues[i]);
+ for ( final long value : bitValues ) {
+ String pos = Category.ToName( value );
// convert Xerox tag into Treebank
- String treebankTag = (String) xeroxTreebankMap.get(pos);
- if (treebankTag != null) {
- l.posSet.add(treebankTag);
+ String treebankTag = xeroxTreebankMap.get( pos );
+ if ( treebankTag != null ) {
+ l.posSet.add( treebankTag );
}
}
// add Lemma to cache map
- Set lemmaSet = null;
+ Collection<LemmaLocalClass> lemmaSet = null;
if (!lemmaCacheMap.containsKey(origWord)) {
- lemmaSet = new HashSet();
+ lemmaSet = new HashSet<>();
} else {
- lemmaSet = (Set) lemmaCacheMap.get(origWord);
+ lemmaSet = lemmaCacheMap.get(origWord);
}
lemmaSet.add(l);
lemmaCacheMap.put(origWord, lemmaSet);
@@ -521,10 +488,10 @@ public class LvgBaseTokenAnnotator extends JCasAnnotator_ImplBase {
*
* @author Mayo Clinic
*/
- class LemmaLocalClass {
+ static class LemmaLocalClass {
public String word;
- public Set posSet;
+ public Collection<String> posSet;
}
}
\ No newline at end of file
diff --git a/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/ClinicalNotePreProcessor.java b/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/ClinicalNotePreProcessor.java
index 20edeb3..486966e 100644
--- a/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/ClinicalNotePreProcessor.java
+++ b/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/ClinicalNotePreProcessor.java
@@ -18,7 +18,6 @@
*/
package org.apache.ctakes.preprocessor;
-import java.io.File;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.StringReader;
@@ -46,7 +45,7 @@ public class ClinicalNotePreProcessor extends DefaultHandler
implements PreProcessor
{
// LOG4J logger based on class name
- private Logger iv_logger = Logger.getLogger(getClass().getName());
+ private final Logger iv_logger = Logger.getLogger( getClass().getName() );
// Jan 1, 1AM, 0001
// private final long DEFAULT_DATE_MILLIS = -62135571600l;
@@ -130,7 +129,7 @@ public class ClinicalNotePreProcessor extends DefaultHandler
private StringBuffer iv_examComponentText = null;
- private List iv_headerList = new ArrayList();
+ private List<String> iv_headerList = new ArrayList<>();
private XMLReader iv_xmlParser;
@@ -236,8 +235,7 @@ public class ClinicalNotePreProcessor extends DefaultHandler
}
else if (iv_insideSection && localName.equals("caption_cd"))
{
- if (iv_insideTable && iv_previousElement.equals("caption"))
- {
+// if (iv_insideTable && iv_previousElement.equals("caption")) {
// processing a table, trace why type of table this is
// String tableType = attributes.getValue("V");
@@ -245,7 +243,7 @@ public class ClinicalNotePreProcessor extends DefaultHandler
// iv_tableType = UNKNOWN_TABLE_TYPE;
// else
// iv_tableType = Integer.parseInt(tableType);
- }
+// }
if (iv_sectionIdentifier == null)
{
@@ -282,12 +280,8 @@ public class ClinicalNotePreProcessor extends DefaultHandler
iv_insideTableData = true;
compress(iv_sectionText);
iv_tdStartOffset = iv_text.length() + iv_sectionText.length();
- }
- else if (iv_insideTableHeader && localName.equals("activity_tmr"))
- {
- }
- else if (iv_insideTableData && localName.equals("coded_entry.value"))
- {
+// } else if (iv_insideTableHeader && localName.equals("activity_tmr")) {
+// } else if (iv_insideTableData && localName.equals("coded_entry.value")) {
}
else if (iv_insideTableData && localName.equals("value"))
{
@@ -299,24 +293,23 @@ public class ClinicalNotePreProcessor extends DefaultHandler
if (iv_sectionIdentifier.equals("20114"))
{
// processing for Administrative section
- if (iv_tableHeaderKeyID.equals("30004"))
- {
- // margin code
- iv_docMetaData.addMetaData(
- MD_KEY_PT_BILLING_CODE,
- value);
- }
- else if (iv_tableHeaderKeyID.equals("30005"))
- {
- // total time
- iv_docMetaData.addMetaData(MD_KEY_TOTAL_TIME, value);
- }
- else if (iv_tableHeaderKeyID.equals("30006"))
- {
- // counseling time
- iv_docMetaData.addMetaData(
- MD_KEY_MINUTES_COUNSELING,
- value);
+ switch ( iv_tableHeaderKeyID ) {
+ case "30004":
+ // margin code
+ iv_docMetaData.addMetaData(
+ MD_KEY_PT_BILLING_CODE,
+ value );
+ break;
+ case "30005":
+ // total time
+ iv_docMetaData.addMetaData( MD_KEY_TOTAL_TIME, value );
+ break;
+ case "30006":
+ // counseling time
+ iv_docMetaData.addMetaData(
+ MD_KEY_MINUTES_COUNSELING,
+ value );
+ break;
}
}
}
@@ -562,7 +555,7 @@ public class ClinicalNotePreProcessor extends DefaultHandler
{
if (localName.equals("id"))
{
- if (iv_foundProvider1 == false)
+ if ( !iv_foundProvider1 )
{
String providerID = attributes.getValue("EX");
iv_docMetaData.addMetaData(
@@ -597,7 +590,7 @@ public class ClinicalNotePreProcessor extends DefaultHandler
{
if (iv_isHospitalSummary)
{
- /**
+ /*
* Note: 07/23/07
* Formats that we need to handel:YYYYMMDDTHHMMSS, YYYYMMDD or YYYYMMDD<separator>YYYYMMDD
*/
@@ -817,7 +810,7 @@ public class ClinicalNotePreProcessor extends DefaultHandler
{
if (iv_tdCounter < iv_headerList.size())
{
- String thText = (String) iv_headerList.get(iv_tdCounter);
+ String thText = iv_headerList.get(iv_tdCounter);
Annotation a = new Annotation();
a.iv_type = thText;
a.startOffset = iv_tdStartOffset;
@@ -873,7 +866,7 @@ public class ClinicalNotePreProcessor extends DefaultHandler
if (iv_logger.isDebugEnabled())
{
- String docID = (String) iv_docMetaData.getMetaData().get(
+ String docID = iv_docMetaData.getMetaData().get(
MD_KEY_DOC_ID);
iv_logger.debug("Finished processing document id=" + docID);
}
@@ -903,35 +896,24 @@ public class ClinicalNotePreProcessor extends DefaultHandler
iv_sectionText.append(text);
}
- if (iv_insideTableData && (iv_tableHeaderKeyID != null))
- {
+// if (iv_insideTableData && (iv_tableHeaderKeyID != null)) {
// Do nothing
- }
+// }
// reset the buffer to zero to start accumulating afresh
iv_contiguousTextBuffer.setLength(0);
}
- private String getSectionStartMarker(String id)
- {
- StringBuffer sb = new StringBuffer();
- sb.append("[start section id=\"");
- sb.append(id);
- sb.append("\"]");
- return sb.toString();
+ private String getSectionStartMarker(String id) {
+ return "[start section id=\"" + id + "\"]";
}
- private String getSectionEndMarker(String id)
- {
- StringBuffer sb = new StringBuffer();
- sb.append("[end section id=\"");
- sb.append(id);
- sb.append("\"]");
- return sb.toString();
+ private String getSectionEndMarker(String id) {
+ return "[end section id=\"" + id + "\"]";
}
private String compress(StringBuffer sb)
{
- StringBuffer compressedSB = new StringBuffer();
+ StringBuilder compressedSB = new StringBuilder();
if (sb == null)
{
return compressedSB.toString();
@@ -970,7 +952,7 @@ public class ClinicalNotePreProcessor extends DefaultHandler
* Given a drm string that may or may not contain the time, this method will
* return a string.
*
- * @param drmStr
+ * @param drmStr -
* @return If something goes wrong, 0 is returned. Otherwise the time in
* milliseconds is returned.
*/
@@ -990,7 +972,7 @@ public class ClinicalNotePreProcessor extends DefaultHandler
if (tIndex != -1)
{
dateStr = drmStr.substring(0, tIndex);
- timeStr = drmStr.substring(tIndex + 1, drmStr.length());
+ timeStr = drmStr.substring(tIndex + 1 );
}
else
{
@@ -1043,7 +1025,7 @@ public class ClinicalNotePreProcessor extends DefaultHandler
/**
* Replaces any non-ascii characters with the specified char.
*
- * @param sb
+ * @param sb -
*/
private void replaceNonAsciiChars(StringBuffer sb, char replacementChar)
{
diff --git a/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/DocumentMetaData.java b/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/DocumentMetaData.java
index d79891a..5d2efea 100644
--- a/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/DocumentMetaData.java
+++ b/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/DocumentMetaData.java
@@ -32,38 +32,38 @@ public class DocumentMetaData
{
private String iv_text;
- private List iv_scAnnotationList = new ArrayList();
+ final private List<Annotation> iv_scAnnotationList = new ArrayList<>();
// key = unique key, value = document meta data
- private Map iv_docMetaDataMap = new HashMap();
+ final private Map<String,String> iv_docMetaDataMap = new HashMap<>();
// key = segment ID, value = SegmentMetaData object
- private Map<String, SegmentMetaData> iv_segMetaDataHash = new HashMap<String, SegmentMetaData>();
+ final private Map<String, SegmentMetaData> iv_segMetaDataHash = new HashMap<>();
/**
* Adds a meta data entry for the document.
- * @param key
- * @param value
+ * @param key -
+ * @param value -
*/
- public void addMetaData(Object key, Object value)
+ public void addMetaData(final String key, final String value)
{
iv_docMetaDataMap.put(key, value);
}
/**
* Gets a map of meta data about the document.
- * @return
+ * @return -
*/
- public Map getMetaData()
+ public Map<String,String> getMetaData()
{
return iv_docMetaDataMap;
}
/**
* Adds a segment.
- * @param smd
+ * @param smd -
*/
- public void addSegment(SegmentMetaData smd)
+ public void addSegment(final SegmentMetaData smd)
{
iv_segMetaDataHash.put(smd.id, smd);
}
@@ -74,7 +74,7 @@ public class DocumentMetaData
*/
public Set<String> getSegmentIdentifiers()
{
- return (Set<String>)iv_segMetaDataHash.keySet();
+ return iv_segMetaDataHash.keySet();
}
/**
@@ -83,25 +83,25 @@ public class DocumentMetaData
* @return SegmentMetaData object that contains meta data about the
* specified segment.
*/
- public SegmentMetaData getSegment(String segmentID)
+ public SegmentMetaData getSegment(final String segmentID)
{
- return (SegmentMetaData) iv_segMetaDataHash.get(segmentID);
+ return iv_segMetaDataHash.get(segmentID);
}
/**
* Adds a single annotation to the syntactic cue annotation list.
- * @param a
+ * @param a -
*/
- public void addAnnotation(Annotation a)
+ public void addAnnotation(final Annotation a)
{
iv_scAnnotationList.add(a);
}
/**
* Adds a list of annotations to the syntactic cue annotation list.
- * @param aList
+ * @param aList -
*/
- public void addAnnotations(List aList)
+ public void addAnnotations(final List<Annotation> aList)
{
iv_scAnnotationList.addAll(aList);
}
@@ -117,7 +117,7 @@ public class DocumentMetaData
}
/**
- * @return
+ * @return -
*/
public String getText()
{
@@ -125,9 +125,9 @@ public class DocumentMetaData
}
/**
- * @param string
+ * @param string -
*/
- public void setText(String string)
+ public void setText(final String string)
{
iv_text = string;
}
diff --git a/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java b/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java
index 38ef5bc..fc018dd 100644
--- a/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java
+++ b/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java
@@ -37,6 +37,7 @@ import org.apache.ctakes.relationextractor.ae.features.TokenFeaturesExtractor;
import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
import org.apache.ctakes.typesystem.type.relation.RelationArgument;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.UimaContextAdmin;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -72,17 +73,26 @@ public abstract class RelationExtractorAnnotator extends CleartkAnnotator<String
protected Random coin = new Random(0);
- private List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> featureExtractors = this.getFeatureExtractors();
+private List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> featureExtractors;
private Class<? extends Annotation> coveringClass = getCoveringClass();
+ protected RelationExtractorAnnotator() {
+ try {
+ featureExtractors = getFeatureExtractors();
+ } catch ( ResourceInitializationException riE ) {
+ Logger.getLogger( "RelationExtractorAnnotator" ).error( riE.getMessage() );
+ }
+ }
+
/**
* Defines the list of feature extractors used by the classifier. Subclasses
* may override this method to provide a different set of feature extractors.
*
* @return The list of feature extractors to use.
*/
- protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors() {
+ protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors()
+ throws ResourceInitializationException {
return Lists.newArrayList(
new TokenFeaturesExtractor(),
new PartOfSpeechFeaturesExtractor(),
diff --git a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/BackwardsTimeAnnotator.java b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/BackwardsTimeAnnotator.java
index 5c34c1a..a7457a9 100644
--- a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/BackwardsTimeAnnotator.java
+++ b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/BackwardsTimeAnnotator.java
@@ -186,7 +186,7 @@ public class BackwardsTimeAnnotator extends TemporalEntityAnnotator_ImplBase {
} catch ( IOException ioE ) {
throw new ResourceInitializationException( ioE );
}
- LOGGER.info( "Finished." );
+// LOGGER.info( "Finished." );
}
@Override
diff --git a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventEventRelationAnnotator.java b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventEventRelationAnnotator.java
index a398fac..8e85a72 100644
--- a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventEventRelationAnnotator.java
+++ b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventEventRelationAnnotator.java
@@ -29,10 +29,6 @@ import org.cleartk.ml.jar.GenericJarClassifierFactory;
import java.io.File;
import java.util.*;
-//import org.apache.ctakes.temporal.ae.feature.DeterminerRelationFeaturesExtractor;
-//import org.apache.ctakes.temporal.ae.feature.TemporalAttributeForMixEventTimeExtractor;
-//import org.apache.ctakes.typesystem.type.syntax.WordToken;
-//import org.apache.ctakes.typesystem.type.textspan.Paragraph;
@PipeBitInfo(
name = "E-E Consecutive Sentence TLinker",
@@ -71,7 +67,8 @@ public class ConsecutiveSentencesEventEventRelationAnnotator extends RelationExt
}
@Override
- protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors() {
+ protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors()
+ throws ResourceInitializationException {
return Lists.newArrayList(
new UnexpandedTokenFeaturesExtractor() //use unexpanded version for i2b2 data
, new OverlappedHeadFeaturesExtractor()
diff --git a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventTimeRelationAnnotator.java b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventTimeRelationAnnotator.java
index a20766f..0b23641 100644
--- a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventTimeRelationAnnotator.java
+++ b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventTimeRelationAnnotator.java
@@ -71,7 +71,8 @@ public class ConsecutiveSentencesEventTimeRelationAnnotator extends RelationExtr
}
@Override
- protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors() {
+ protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors()
+ throws ResourceInitializationException {
return Lists.newArrayList(
new UnexpandedTokenFeaturesExtractor() //use unexpanded version for i2b2 data
// , new OverlappedHeadFeaturesExtractor()
diff --git a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventCRFRelationAnnotator.java b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventCRFRelationAnnotator.java
index d64c9c8..21bb65e 100644
--- a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventCRFRelationAnnotator.java
+++ b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventCRFRelationAnnotator.java
@@ -21,7 +21,6 @@ package org.apache.ctakes.temporal.ae;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -31,48 +30,20 @@ import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator.IdentifiedAnnotationPair;
import org.apache.ctakes.relationextractor.ae.features.PartOfSpeechFeaturesExtractor;
import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
-import org.apache.ctakes.relationextractor.ae.features.TokenFeaturesExtractor;
import org.apache.ctakes.temporal.ae.feature.CheckSpecialWordRelationExtractor;
-import org.apache.ctakes.temporal.ae.feature.ConjunctionRelationFeaturesExtractor;
-//import org.apache.ctakes.temporal.ae.feature.DependencyParseUtils;
import org.apache.ctakes.temporal.ae.feature.DependencyPathFeaturesExtractor;
-import org.apache.ctakes.temporal.ae.feature.CoordinateFeaturesExtractor;
-import org.apache.ctakes.temporal.ae.feature.DependingVerbsFeatureExtractor;
-import org.apache.ctakes.temporal.ae.feature.EmptyFeaturesExtractor;
-import org.apache.ctakes.temporal.ae.feature.MultiTokenFeaturesExtractor;
-import org.apache.ctakes.temporal.ae.feature.NoOtherETPuntInBetweenFeaturesExtractor;
-//import org.apache.ctakes.temporal.ae.feature.EventInBetweenPropertyExtractor;
-//import org.apache.ctakes.temporal.ae.feature.EventOutsidePropertyExtractor;
-import org.apache.ctakes.temporal.ae.feature.SpecialAnnotationRelationExtractor;
-import org.apache.ctakes.temporal.ae.feature.TemporalPETFlatExtractor;
-import org.apache.ctakes.temporal.ae.feature.TokenPropertyFeaturesExtractor;
-import org.apache.ctakes.temporal.ae.feature.DeterminerRelationFeaturesExtractor;
import org.apache.ctakes.temporal.ae.feature.EventArgumentPropertyExtractor;
-import org.apache.ctakes.temporal.ae.feature.EventTimeRelationFeatureExtractor;
-import org.apache.ctakes.temporal.ae.feature.EventPositionRelationFeaturesExtractor;
-import org.apache.ctakes.temporal.ae.feature.NumberOfEventsInTheSameSentenceExtractor;
-import org.apache.ctakes.temporal.ae.feature.NearbyVerbTenseRelationExtractor;
-import org.apache.ctakes.temporal.ae.feature.NumberOfEventTimeBetweenCandidatesExtractor;
import org.apache.ctakes.temporal.ae.feature.OverlappedHeadFeaturesExtractor;
-import org.apache.ctakes.temporal.ae.feature.SRLRelationFeaturesExtractor;
-import org.apache.ctakes.temporal.ae.feature.TimeXRelationFeaturesExtractor;
-import org.apache.ctakes.temporal.ae.feature.SectionHeaderRelationExtractor;
-//import org.apache.ctakes.temporal.ae.feature.TemporalAttributeFeatureExtractor;
import org.apache.ctakes.temporal.ae.feature.UmlsFeatureExtractor;
import org.apache.ctakes.temporal.ae.feature.UnexpandedTokenFeaturesExtractor;
-//import org.apache.ctakes.temporal.ae.feature.UnexpandedTokenFeaturesExtractor;
-//import org.apache.ctakes.temporal.ae.feature.treekernel.TemporalPETExtractor;
import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
import org.apache.ctakes.typesystem.type.relation.RelationArgument;
import org.apache.ctakes.typesystem.type.relation.TemporalTextRelation;
-import org.apache.ctakes.typesystem.type.syntax.BaseToken;
-import org.apache.ctakes.typesystem.type.syntax.WordToken;
-//import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.TimeMention;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.log4j.Logger;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
@@ -82,14 +53,9 @@ import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.ml.CleartkAnnotator;
import org.cleartk.ml.CleartkSequenceAnnotator;
-import org.cleartk.ml.DataWriter;
import org.cleartk.ml.Feature;
-import org.cleartk.ml.Instance;
import org.cleartk.ml.Instances;
import org.cleartk.ml.crfsuite.CrfSuiteStringOutcomeDataWriter;
-import org.cleartk.ml.feature.extractor.CleartkExtractor;
-import org.cleartk.ml.feature.extractor.FeatureExtractor1;
-import org.cleartk.ml.jar.DefaultDataWriterFactory;
import org.cleartk.ml.jar.DefaultSequenceDataWriterFactory;
import org.cleartk.ml.jar.DirectoryDataWriterFactory;
import org.cleartk.ml.jar.GenericJarClassifierFactory;
@@ -149,6 +115,7 @@ public class EventEventCRFRelationAnnotator extends TemporalSequenceAnnotator_Im
public static boolean eventExpansion = false;
+ private List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> featureExtractors;
/**
* @deprecated use String path instead of File.
@@ -166,7 +133,17 @@ public class EventEventCRFRelationAnnotator extends TemporalSequenceAnnotator_Im
new File(modelDirectory, "model.jar"));
}
- protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors() {
+ protected EventEventCRFRelationAnnotator() {
+ try {
+ featureExtractors = getFeatureExtractors();
+ } catch ( ResourceInitializationException riE ) {
+ Logger.getLogger( "EventEventCRFRelationAnnotator" ).error( riE.getMessage() );
+ }
+
+ }
+
+ protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors()
+ throws ResourceInitializationException {
return Lists.newArrayList(
new UnexpandedTokenFeaturesExtractor() //new TokenFeaturesExtractor()
, new PartOfSpeechFeaturesExtractor()
@@ -231,7 +208,7 @@ public class EventEventCRFRelationAnnotator extends TemporalSequenceAnnotator_Im
IdentifiedAnnotation arg2 = pair.getArg2();
// apply all the feature extractors to extract the list of features
List<Feature> features = new ArrayList<>();
- for (RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation> extractor : this.getFeatureExtractors()) {
+ for ( RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation> extractor : featureExtractors ) {
List<Feature> feats = extractor.extract(jCas, arg1, arg2);
if (feats != null) features.addAll(feats);
}
diff --git a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventI2B2RelationAnnotator.java b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventI2B2RelationAnnotator.java
index 18e41de..5b75b20 100644
--- a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventI2B2RelationAnnotator.java
+++ b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventI2B2RelationAnnotator.java
@@ -132,7 +132,8 @@ public class EventEventI2B2RelationAnnotator extends RelationExtractorAnnotator
}
@Override
- protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors() {
+ protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors()
+ throws ResourceInitializationException {
return Lists.newArrayList(
// new TokenFeaturesExtractor()
new UnexpandedTokenFeaturesExtractor() //use unexpanded version for i2b2 data
diff --git a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationGoldContainerAnnotator.java b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationGoldContainerAnnotator.java
index bf27103..70ad3a0 100644
--- a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationGoldContainerAnnotator.java
+++ b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationGoldContainerAnnotator.java
@@ -144,7 +144,8 @@ public class EventEventRelationGoldContainerAnnotator extends RelationExtractorA
}
@Override
- protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors() {
+ protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors()
+ throws ResourceInitializationException {
return Lists.newArrayList(
new UnexpandedTokenFeaturesExtractor()
// new TokenFeaturesExtractor()
diff --git a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationSeedBasedAnnotator.java b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationSeedBasedAnnotator.java
index 837a866..91dcf90 100644
--- a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationSeedBasedAnnotator.java
+++ b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationSeedBasedAnnotator.java
@@ -145,7 +145,8 @@ public class EventEventRelationSeedBasedAnnotator extends RelationExtractorAnnot
}
@Override
- protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors() {
+ protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors()
+ throws ResourceInitializationException {
return Lists.newArrayList(
new UnexpandedTokenFeaturesExtractor() //new TokenFeaturesExtractor()
// new EmptyFeaturesExtractor()
diff --git a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeI2B2RelationAnnotator.java b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeI2B2RelationAnnotator.java
index 27c7612..c0a4e7e 100644
--- a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeI2B2RelationAnnotator.java
+++ b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeI2B2RelationAnnotator.java
@@ -126,7 +126,8 @@ public class EventTimeI2B2RelationAnnotator extends RelationExtractorAnnotator {
}
@Override
- protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors() {
+ protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors()
+ throws ResourceInitializationException {
return Lists.newArrayList(
// new TokenFeaturesExtractor()
new UnexpandedTokenFeaturesExtractor() //use unexpanded version for i2b2 data
diff --git a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java
index 77f199b..9ba2f6c 100644
--- a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java
+++ b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java
@@ -127,11 +127,12 @@ public class EventTimeRelationAnnotator extends RelationExtractorAnnotator {
} catch ( IOException ioE ) {
throw new ResourceInitializationException( ioE );
}
- LOGGER.info( "Finished." );
+// LOGGER.info( "Finished." );
}
@Override
- protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors() {
+ protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors()
+ throws ResourceInitializationException {
return Lists.newArrayList(
new TokenFeaturesExtractor()
// new UnexpandedTokenFeaturesExtractor() //use unexpanded version for i2b2 data
diff --git a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeSelfRelationAnnotator.java b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeSelfRelationAnnotator.java
index b6dc711..4fa534f 100644
--- a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeSelfRelationAnnotator.java
+++ b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeSelfRelationAnnotator.java
@@ -138,7 +138,8 @@ public class EventTimeSelfRelationAnnotator extends TemporalRelationExtractorAnn
private RelationSyntacticETEmbeddingFeatureExtractor embedingExtractor;
@Override
- protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors() {
+ protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors()
+ throws ResourceInitializationException {
final String vectorFile = "org/apache/ctakes/temporal/gloveresult_3";
try {
this.embedingExtractor = new RelationSyntacticETEmbeddingFeatureExtractor(vectorFile);
diff --git a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalRelationExtractorAnnotator.java b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalRelationExtractorAnnotator.java
index 4409699..0928713 100644
--- a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalRelationExtractorAnnotator.java
+++ b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalRelationExtractorAnnotator.java
@@ -27,6 +27,7 @@ import org.apache.ctakes.relationextractor.ae.features.*;
import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
import org.apache.ctakes.typesystem.type.relation.RelationArgument;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.UimaContextAdmin;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -71,7 +72,7 @@ public abstract class TemporalRelationExtractorAnnotator extends CleartkAnnotato
protected Random coin = new Random(0);
- private List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> featureExtractors = this.getFeatureExtractors();
+ private List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> featureExtractors;
private Class<? extends Annotation> coveringClass = getCoveringClass();
@@ -92,13 +93,23 @@ public abstract class TemporalRelationExtractorAnnotator extends CleartkAnnotato
}
*/
+
+ protected TemporalRelationExtractorAnnotator() {
+ try {
+ featureExtractors = getFeatureExtractors();
+ } catch ( ResourceInitializationException riE ) {
+ Logger.getLogger( "TemporalRelationExtractorAnnotator" ).error( riE.getMessage() );
+ }
+ }
+
/**
* Defines the list of feature extractors used by the classifier. Subclasses
* may override this method to provide a different set of feature extractors.
*
* @return The list of feature extractors to use.
*/
- protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors() {
+ protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors()
+ throws ResourceInitializationException {
return Lists.newArrayList(
new TokenFeaturesExtractor(),
new PartOfSpeechFeaturesExtractor(),
diff --git a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/baselines/TreeHeightBaseline.java b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/baselines/TreeHeightBaseline.java
index 7efce4f..8ca63b5 100644
--- a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/baselines/TreeHeightBaseline.java
+++ b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/baselines/TreeHeightBaseline.java
@@ -62,7 +62,8 @@ public class TreeHeightBaseline extends RelationExtractorAnnotator {
}
@Override
- protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors() {
+ protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors()
+ throws ResourceInitializationException {
return new ArrayList<>();
}
diff --git a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CheckSpecialWordRelationExtractor.java b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CheckSpecialWordRelationExtractor.java
index 7daa9d3..0edb18d 100644
--- a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CheckSpecialWordRelationExtractor.java
+++ b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CheckSpecialWordRelationExtractor.java
@@ -18,47 +18,55 @@
*/
package org.apache.ctakes.temporal.ae.feature;
+import java.io.BufferedReader;
import java.io.IOException;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.List;
+import java.io.InputStreamReader;
+import java.util.*;
+import java.util.regex.Pattern;
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.core.util.StringUtil;
import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.ml.Feature;
-import org.cleartk.timeml.util.TimeWordsExtractor;
import org.springframework.util.StringUtils;
-import com.google.common.base.Charsets;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Multimap;
-import com.google.common.io.Resources;
public class CheckSpecialWordRelationExtractor implements RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>{
//final static List<String> specialWd = Arrays.asList("before","prior","previous","previously","ago","soon","earlier","early","after","later","subsequent","follow","following","followed","post","since","back","start","started","by","past","starting");
- private static final String LOOKUP_PATH = "/org/apache/ctakes/temporal/TimeLexicon.csv";
-
- private Multimap<String, String> specialWd;
-
- public CheckSpecialWordRelationExtractor() {
- this.specialWd = ArrayListMultimap.create();
- URL url = TimeWordsExtractor.class.getResource(LOOKUP_PATH);
- try {
- for (String line : Resources.readLines(url, Charsets.US_ASCII)) {
- String[] WordAndType = line.split(",");
- if (WordAndType.length != 2) {
- throw new IllegalArgumentException("Expected '<word>,<type>', found: " + line);
- }
- this.specialWd.put(WordAndType[0], WordAndType[1]);
- }
- } catch (IOException e) {
- System.err.println("TimeLexicon resource initialization error.");
+ private static final String LOOKUP_PATH = "org/apache/ctakes/temporal/TimeLexicon.csv";
+ static private final Pattern EOL_PATTERN = Pattern.compile( "[\r\n]" );
+
+// private Multimap<String, String> specialWd;
+ private final Map<String, Collection<String>> _specialWd;
+ public CheckSpecialWordRelationExtractor() throws ResourceInitializationException {
+ // TODO Use a plain old java HashMap<String,Collection<String>>.
+// this.specialWd = ArrayListMultimap.create();
+ _specialWd = new HashMap<>();
+ try ( final BufferedReader reader
+ = new BufferedReader( new InputStreamReader( FileLocator.getAsStream( LOOKUP_PATH ) ) ) ) {
+ reader.lines().forEachOrdered( this::loadWordTypes );
+ } catch ( IOException | IllegalArgumentException multE ) {
+ throw new ResourceInitializationException( multE );
+ }
+ }
+
+ private void loadWordTypes( final String line ) throws IllegalArgumentException {
+ final String[] wordAndType = StringUtil.fastSplit( line, ',' );
+ if (wordAndType.length != 2) {
+ throw new IllegalArgumentException("Expected '<word>,<type>', found: " + line);
}
+// specialWd.put(wordAndType[0], wordAndType[1]);
+ _specialWd.computeIfAbsent( wordAndType[0], t -> new HashSet<>() ).add( wordAndType[1] );
}
+
@Override
public List<Feature> extract(JCas jcas, IdentifiedAnnotation arg1,
IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
@@ -99,7 +107,12 @@ public class CheckSpecialWordRelationExtractor implements RelationFeaturesExtrac
// String textAfterArg1 = null;
// String textBeforeArg2 = null;
// if(end-begin <= 2* window){
- textInBetween = jcas.getDocumentText().substring(begin, end).replaceAll("[\r\n]", " ").toLowerCase();
+ textInBetween = jcas.getDocumentText()
+ .substring(begin, end)
+// .replaceAll("[\r\n]", " ")
+ .toLowerCase();
+ textInBetween = EOL_PATTERN.matcher( textInBetween ).replaceAll( " " );
+
// }else{
// int arg1tail = Math.min(begin + window, arg1Sent.getEnd());
// textAfterArg1 = jcas.getDocumentText().substring(begin, arg1tail).replaceAll("[\r\n]", " ").toLowerCase();
@@ -110,13 +123,61 @@ public class CheckSpecialWordRelationExtractor implements RelationFeaturesExtrac
// String textBeforeArg1 = jcas.getDocumentText().substring(arg1head, arg1.getBegin()).replaceAll("[\r\n]", " ").toLowerCase();
// int arg2tail = Math.min(arg2.getEnd()+window, arg2Sent.getEnd());
// String textAfterArg2 = jcas.getDocumentText().substring(arg2.getEnd(), arg2tail).replaceAll("[\r\n]", " ").toLowerCase();
- String textInArg1 = jcas.getDocumentText().substring(arg1.getBegin(), arg1.getEnd()).replaceAll("[\r\n]", " ").toLowerCase();
- String textInArg2 = jcas.getDocumentText().substring(arg2.getBegin(), arg2.getEnd()).replaceAll("[\r\n]", " ").toLowerCase();
-
- for(String lexicon : specialWd.keySet()){
- if( textInBetween != null && textInBetween.matches(".*\\b"+lexicon+"\\b.*")){
- String type = StringUtils.collectionToCommaDelimitedString(specialWd.get(lexicon));
- Feature feature = new Feature("SpecialWd_InBetween", type);
+ String textInArg1 = jcas.getDocumentText()
+ .substring(arg1.getBegin(), arg1.getEnd())
+// .replaceAll("[\r\n]", " ")
+ .toLowerCase();
+ textInArg1 = EOL_PATTERN.matcher( textInArg1 ).replaceAll( " " );
+ String textInArg2 = jcas.getDocumentText()
+ .substring(arg2.getBegin(), arg2.getEnd())
+// .replaceAll("[\r\n]", " ")
+ .toLowerCase();
+ textInArg2 = EOL_PATTERN.matcher( textInArg2 ).replaceAll( " " );
+
+
+// for(String lexicon : specialWd.keySet()){
+// if( textInBetween != null && textInBetween.matches(".*\\b"+lexicon+"\\b.*")){
+// String type = StringUtils.collectionToCommaDelimitedString(specialWd.get(lexicon));
+// Feature feature = new Feature("SpecialWd_InBetween", type);
+// feats.add(feature);
+// }
+//// if( textBeforeArg1.matches(".*\\b"+lexicon+"\\b.*")){
+//// String type = StringUtils.collectionToCommaDelimitedString(specialWd.get(lexicon));
+//// Feature feature = new Feature("SpecialWd_BeforeArg1", type);
+//// feats.add(feature);
+//// }
+// if( textInArg1.matches(".*\\b"+lexicon+"\\b.*")){
+// String type = StringUtils.collectionToCommaDelimitedString(specialWd.get(lexicon));
+// Feature feature = new Feature("SpecialWd_InArg1", type);
+// feats.add(feature);
+// }
+// // if( textAfterArg1 != null && textAfterArg1.matches(".*\\b"+lexicon+"\\b.*")){
+// // String type = StringUtils.collectionToCommaDelimitedString(specialWd.get(lexicon));
+// // Feature feature = new Feature("SpecialWd_AfterArg1", type);
+// // feats.add(feature);
+// // }
+// // if( textBeforeArg2 != null && textBeforeArg2.matches(".*\\b"+lexicon+"\\b.*")){
+// // String type = StringUtils.collectionToCommaDelimitedString(specialWd.get(lexicon));
+// // Feature feature = new Feature("SpecialWd_BeforeArg2", type);
+// // feats.add(feature);
+// // }
+// if( textInArg2.matches(".*\\b"+lexicon+"\\b.*")){
+// String type = StringUtils.collectionToCommaDelimitedString(specialWd.get(lexicon));
+// Feature feature = new Feature("SpecialWd_InArg2", type);
+// feats.add(feature);
+// }
+//// if( textAfterArg2.matches(".*\\b"+lexicon+"\\b.*")){
+//// String type = StringUtils.collectionToCommaDelimitedString(specialWd.get(lexicon));
+//// Feature feature = new Feature("SpecialWd_AfterArg2", type);
+//// feats.add(feature);
+//// }
+// }
+
+ for( Map.Entry<String,Collection<String>> lexiconType : _specialWd.entrySet()){
+ final Pattern lexiconPattern = Pattern.compile( ".*\\b"+lexiconType.getKey()+"\\b.*" );
+ if( textInBetween != null && lexiconPattern.matcher( textInBetween ).matches() ){
+ final String type = String.join( ",", lexiconType.getValue() );
+ final Feature feature = new Feature("SpecialWd_InBetween", type);
feats.add(feature);
}
// if( textBeforeArg1.matches(".*\\b"+lexicon+"\\b.*")){
@@ -124,9 +185,9 @@ public class CheckSpecialWordRelationExtractor implements RelationFeaturesExtrac
// Feature feature = new Feature("SpecialWd_BeforeArg1", type);
// feats.add(feature);
// }
- if( textInArg1.matches(".*\\b"+lexicon+"\\b.*")){
- String type = StringUtils.collectionToCommaDelimitedString(specialWd.get(lexicon));
- Feature feature = new Feature("SpecialWd_InArg1", type);
+ if( lexiconPattern.matcher( textInArg1 ).matches() ){
+ final String type = String.join( ",", lexiconType.getValue() );
+ final Feature feature = new Feature("SpecialWd_InArg1", type);
feats.add(feature);
}
// if( textAfterArg1 != null && textAfterArg1.matches(".*\\b"+lexicon+"\\b.*")){
@@ -139,8 +200,8 @@ public class CheckSpecialWordRelationExtractor implements RelationFeaturesExtrac
// Feature feature = new Feature("SpecialWd_BeforeArg2", type);
// feats.add(feature);
// }
- if( textInArg2.matches(".*\\b"+lexicon+"\\b.*")){
- String type = StringUtils.collectionToCommaDelimitedString(specialWd.get(lexicon));
+ if( lexiconPattern.matcher( textInArg2 ).matches() ){
+ final String type = String.join( ",", lexiconType.getValue() );
Feature feature = new Feature("SpecialWd_InArg2", type);
feats.add(feature);
}
diff --git a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeWordTypeExtractor.java b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeWordTypeExtractor.java
index 6c9894d..2b535c8 100644
--- a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeWordTypeExtractor.java
+++ b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeWordTypeExtractor.java
@@ -18,12 +18,17 @@
*/
package org.apache.ctakes.temporal.ae.feature;
+import java.io.BufferedReader;
import java.io.IOException;
+import java.io.InputStreamReader;
import java.net.URL;
import java.util.Collections;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.regex.Pattern;
+import org.apache.ctakes.core.resource.FileLocator;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
@@ -37,27 +42,46 @@ import com.google.common.collect.Maps;
import com.google.common.io.Resources;
public class TimeWordTypeExtractor<T extends Annotation> implements FeatureExtractor1<T> {
-
+
private static final String FEATURE_NAME = "TimeWordType";
- private static final String LOOKUP_PATH = "/org/apache/ctakes/temporal/time_word_types.txt";
-
+// private static final String LOOKUP_PATH = "/org/apache/ctakes/temporal/time_word_types.txt";
+ private static final String LOOKUP_PATH = "org/apache/ctakes/temporal/time_word_types.txt";
+
private Map<String, String> wordTypes;
public TimeWordTypeExtractor() throws ResourceInitializationException {
- this.wordTypes = Maps.newHashMap();
- URL url = TimeWordsExtractor.class.getResource(LOOKUP_PATH);
- try {
- for (String line : Resources.readLines(url, Charsets.US_ASCII)) {
- String[] typeAndWord = line.split("\\s+");
- if (typeAndWord.length != 2) {
- throw new IllegalArgumentException("Expected '<type> <word>', found: " + line);
- }
- this.wordTypes.put(typeAndWord[1], typeAndWord[0]);
- }
- } catch (IOException e) {
- throw new ResourceInitializationException(e);
+ wordTypes = new HashMap<>();
+ try ( final BufferedReader reader
+ = new BufferedReader( new InputStreamReader( FileLocator.getAsStream( LOOKUP_PATH ) ) ) ) {
+ reader.lines().forEachOrdered( this::loadWordTypes );
+ } catch ( IOException | IllegalArgumentException multE ) {
+ throw new ResourceInitializationException( multE );
+ }
+
+// this.wordTypes = Maps.newHashMap();
+// URL url = TimeWordsExtractor.class.getResource(LOOKUP_PATH);
+// try {
+// for (String line : Resources.readLines(url, Charsets.US_ASCII)) {
+// String[] typeAndWord = line.split("\\s+");
+// if (typeAndWord.length != 2) {
+// throw new IllegalArgumentException("Expected '<type> <word>', found: " + line);
+// }
+// this.wordTypes.put(typeAndWord[1], typeAndWord[0]);
+// }
+// } catch (IOException e) {
+// throw new ResourceInitializationException(e);
+// }
+ }
+
+ static private final Pattern WHITE_SPACE_PATTERN = Pattern.compile( "\\s+" );
+
+ private void loadWordTypes( final String line ) throws IllegalArgumentException {
+ final String[] typeAndWord = WHITE_SPACE_PATTERN.split( line.trim() );
+ if (typeAndWord.length != 2) {
+ throw new IllegalArgumentException("Expected '<type> <word>', found: " + line);
}
+ wordTypes.put( typeAndWord[1], typeAndWord[0] );
}
@Override