You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2017/11/08 16:09:47 UTC
svn commit: r1814594 [1/2] - in /ctakes/trunk:
ctakes-core/src/main/java/org/apache/ctakes/core/patient/ ctakes-coreference/
ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/
ctakes-coreference/src/main/java/org/apache/ctakes/coreferen...
Author: tmill
Date: Wed Nov 8 16:09:47 2017
New Revision: 1814594
URL: http://svn.apache.org/viewvc?rev=1814594&view=rev
Log:
New patient collector model working for coreference evaluation.
Added:
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainCoNLLWriter.java
- copied, changed from r1814586, ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainScoringOutput.java
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/MarkableCacheRelationExtractor.java
Removed:
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MarkableHeadTreeCreator.java
ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/struct/MapFactory.java
Modified:
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/patient/AbstractPatientConsumer.java
ctakes/trunk/ctakes-coreference/pom.xml
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainScoringOutput.java
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/PatientMentionClusterCoreferencer.java
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterDepHeadExtractor.java
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterStringFeaturesExtractor.java
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterUMLSFeatureExtractor.java
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/ClusterMentionPairer_ImplBase.java
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfEventCoreference.java
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/factory/CoreferenceAnnotatorFactory.java
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/ClusterMentionFetcher.java
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/MarkableUtilities.java
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/patient/AbstractPatientConsumer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/patient/AbstractPatientConsumer.java?rev=1814594&r1=1814593&r2=1814594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/patient/AbstractPatientConsumer.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/patient/AbstractPatientConsumer.java Wed Nov 8 16:09:47 2017
@@ -61,7 +61,7 @@ abstract public class AbstractPatientCon
* {@inheritDoc}
*/
@Override
- final public void collectionProcessComplete() throws AnalysisEngineProcessException {
+ public void collectionProcessComplete() throws AnalysisEngineProcessException {
super.collectionProcessComplete();
final Collection<String> allPatientIds = PatientNoteStore.getInstance().getPatientIds();
for ( String id : allPatientIds ) {
Modified: ctakes/trunk/ctakes-coreference/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/pom.xml?rev=1814594&r1=1814593&r2=1814594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/pom.xml (original)
+++ ctakes/trunk/ctakes-coreference/pom.xml Wed Nov 8 16:09:47 2017
@@ -52,6 +52,11 @@
<groupId>org.apache.ctakes</groupId>
<artifactId>ctakes-assertion</artifactId>
</dependency>
+ <dependency>
+ <groupId>org.cleartk</groupId>
+ <artifactId>cleartk-ml-liblinear</artifactId>
+ <version>2.0.0</version>
+ </dependency>
</dependencies>
<build>
<plugins>
Copied: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainCoNLLWriter.java (from r1814586, ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainScoringOutput.java)
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainCoNLLWriter.java?p2=ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainCoNLLWriter.java&p1=ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainScoringOutput.java&r1=1814586&r2=1814594&rev=1814594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainScoringOutput.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainCoNLLWriter.java Wed Nov 8 16:09:47 2017
@@ -34,78 +34,36 @@ import org.cleartk.util.ViewUriUtil;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.Multiset;
-@PipeBitInfo(
- name = "Coreference Score Writer",
- description = "Writes scores of system coreference chains compared to chains in a Gold View.",
- role = PipeBitInfo.Role.SPECIAL,
- dependencies = { PipeBitInfo.TypeProduct.MARKABLE, PipeBitInfo.TypeProduct.COREFERENCE_RELATION }
-)
-public class CoreferenceChainScoringOutput extends JCasAnnotator_ImplBase{
- @ConfigurationParameter(
- name = ConfigParameterConstants.PARAM_OUTPUTDIR,
- mandatory = true,
- description = "Name of chain file in CoNLL format"
- )
- private String outputFilename;
+public class CoreferenceChainCoNLLWriter {
private PrintWriter out = null;
private PrintWriter icOut = null;
-
- public static final String PARAM_GOLD_VIEW_NAME = "GoldViewName";
- @ConfigurationParameter(
- name = PARAM_GOLD_VIEW_NAME,
- mandatory = false,
- description = "Name of gold view in jcas"
- )
- private String goldViewName = null;
- boolean isGold;
-
- private int docNum = 0;
-
- @Override
- public void initialize(final UimaContext context) throws ResourceInitializationException{
- super.initialize(context);
-
- try {
- out = new PrintWriter(outputFilename);
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- throw new ResourceInitializationException(e);
- }
-
- if(goldViewName != null) isGold = true;
- else{
- isGold = false;
- try {
- icOut = new PrintWriter(outputFilename + ".icarus");
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- throw new ResourceInitializationException(e);
- }
- }
+ int docNum=0;
+
+ public CoreferenceChainCoNLLWriter(String outputFile) throws FileNotFoundException {
+ out = new PrintWriter(outputFile);
}
- @Override
- public void process(JCas jCas) throws AnalysisEngineProcessException {
+ public void writeCas(JCas jCas) throws AnalysisEngineProcessException {
String myView = jCas.getViewName();
File filename = new File(ViewUriUtil.getURI(jCas));
- JCas chainsCas = null;
- try {
- chainsCas = goldViewName != null ? jCas.getView(goldViewName) : jCas;
- } catch (CASException e) {
- e.printStackTrace();
- throw new AnalysisEngineProcessException(e);
- }
+// JCas chainsCas = null;
+// try {
+// chainsCas = goldViewName != null ? jCas.getView(goldViewName) : jCas;
+// } catch (CASException e) {
+// e.printStackTrace();
+// throw new AnalysisEngineProcessException(e);
+// }
int chainNum = 1;
HashMap<Annotation, Integer> ent2chain = new HashMap<>();
- if(isGold) System.out.println("\nGold chains:");
- else{
- icOut.println(String.format("#begin document (%s); part 000", filename.getPath()));
- System.out.println("\nSystem chains:");
- }
+// if(isGold) System.out.println("\nGold chains:");
+// else{
+// icOut.println(String.format("#begin document (%s); part 000", filename.getPath()));
+// System.out.println("\nChains:");
+// }
- Collection<CollectionTextRelation> rels = JCasUtil.select(chainsCas, CollectionTextRelation.class);
+ Collection<CollectionTextRelation> rels = JCasUtil.select(jCas, CollectionTextRelation.class);
if(rels.size() == 0){
return;
}
@@ -133,7 +91,7 @@ public class CoreferenceChainScoringOutp
members = ((NonEmptyFSList)members).getTail();
System.out.print("Mention: " + mention.getCoveredText().replace("\n", "<CR>"));
System.out.print(" (" + mention.getBegin() + ", " + mention.getEnd() + ")");
- if(!isGold && !mention.getView().getViewName().equals(myView)){
+ if(!mention.getView().getViewName().equals(myView)){
System.out.print("[DOC:" + mention.getView().getViewName() + "]");
}
System.out.print(" -----> ");
@@ -173,7 +131,7 @@ public class CoreferenceChainScoringOutp
if(token.getCoveredText().length() > 1 && token.getCoveredText().endsWith(".")){
lastInd = token.getEnd()-1;
}
- List<Markable> markables = new ArrayList<>(JCasUtil.selectCovering(chainsCas, Markable.class, token.getBegin(), lastInd));
+ List<Markable> markables = new ArrayList<>(JCasUtil.selectCovering(jCas, Markable.class, token.getBegin(), lastInd));
List<Annotation> startMention = new ArrayList<>();
Multiset<Integer> endMention = HashMultiset.create();
List<Integer> wholeMention = new ArrayList<>();
@@ -196,9 +154,9 @@ public class CoreferenceChainScoringOutp
endMention.add(ent2chain.get(markable));
}
- if(!isGold){
- icOut.println(String.format("%d-%d-%d\n", sentId, markable.getBegin(), markable.getEnd()));
- }
+// if(!isGold){
+// icOut.println(String.format("%d-%d-%d\n", sentId, markable.getBegin(), markable.getEnd()));
+// }
}
}
@@ -298,9 +256,9 @@ public class CoreferenceChainScoringOutp
sentId++;
}
}
- if(!isGold){
- icOut.println("#end document");
- }
+// if(!isGold){
+// icOut.println("#end document");
+// }
out.println("#end document " + filename.getPath());
out.flush();
docNum++;
Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainScoringOutput.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainScoringOutput.java?rev=1814594&r1=1814593&r2=1814594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainScoringOutput.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainScoringOutput.java Wed Nov 8 16:09:47 2017
@@ -11,6 +11,7 @@ import java.util.List;
import org.apache.ctakes.constituency.parser.util.TreeUtils;
import org.apache.ctakes.core.config.ConfigParameterConstants;
import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
import org.apache.ctakes.typesystem.type.relation.RelationArgument;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
@@ -19,6 +20,7 @@ import org.apache.ctakes.typesystem.type
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CASException;
+import org.apache.uima.cas.CASRuntimeException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.util.JCasUtil;
@@ -87,13 +89,23 @@ public class CoreferenceChainScoringOutp
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
String myView = jCas.getViewName();
- File filename = new File(ViewUriUtil.getURI(jCas));
+ File filename = null;
+ try{
+ filename = new File(ViewUriUtil.getURI(jCas));
+ }catch(Exception e){
+ filename = new File(DocumentIDAnnotationUtil.getDocumentID(jCas));
+ }
+
JCas chainsCas = null;
try {
chainsCas = goldViewName != null ? jCas.getView(goldViewName) : jCas;
- } catch (CASException e) {
- e.printStackTrace();
- throw new AnalysisEngineProcessException(e);
+ } catch (CASRuntimeException|CASException e) {
+ try{
+ chainsCas = goldViewName != null ? jCas.getView(goldViewName + "_" + filename) : jCas;
+ } catch (CASException e2) {
+ e.printStackTrace();
+ throw new AnalysisEngineProcessException(e2);
+ }
}
int chainNum = 1;
HashMap<Annotation, Integer> ent2chain = new HashMap<>();
Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java?rev=1814594&r1=1814593&r2=1814594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java Wed Nov 8 16:09:47 2017
@@ -7,7 +7,9 @@ import org.apache.ctakes.core.util.ListF
import org.apache.ctakes.coreference.ae.features.cluster.*;
import org.apache.ctakes.coreference.ae.pairing.cluster.*;
import org.apache.ctakes.coreference.util.ClusterMentionFetcher;
+import org.apache.ctakes.coreference.util.MarkableCacheRelationExtractor;
import org.apache.ctakes.coreference.util.MarkableUtilities;
+import org.apache.ctakes.dependency.parser.util.DependencyUtility;
import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
import org.apache.ctakes.relationextractor.eval.RelationExtractorEvaluation.HashableArguments;
import org.apache.ctakes.temporal.utils.PatientViewsUtil;
@@ -15,6 +17,7 @@ import org.apache.ctakes.typesystem.type
import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
import org.apache.ctakes.typesystem.type.relation.CollectionTextRelationIdentifiedAnnotationRelation;
import org.apache.ctakes.typesystem.type.relation.CoreferenceRelation;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
import org.apache.ctakes.typesystem.type.textsem.*;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.ctakes.utils.struct.CounterMap;
@@ -49,69 +52,69 @@ import static org.apache.ctakes.corefere
@PipeBitInfo(
- name = "Coreference (Clusters)",
- description = "Coreference annotator using mention-synchronous paradigm.",
- dependencies = { BASE_TOKEN, SENTENCE, SECTION, IDENTIFIED_ANNOTATION, MARKABLE },
- products = { COREFERENCE_RELATION }
- )
+ name = "Coreference (Clusters)",
+ description = "Coreference annotator using mention-synchronous paradigm.",
+ dependencies = { BASE_TOKEN, SENTENCE, SECTION, IDENTIFIED_ANNOTATION, MARKABLE },
+ products = { COREFERENCE_RELATION }
+)
public class MentionClusterCoreferenceAnnotator extends CleartkAnnotator<String> {
static private final Logger LOGGER = Logger.getLogger( MentionClusterCoreferenceAnnotator.class.getSimpleName() );
public static final String NO_RELATION_CATEGORY = "-NONE-";
public static final String PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE =
- "ProbabilityOfKeepingANegativeExample";
+ "ProbabilityOfKeepingANegativeExample";
@ConfigurationParameter(
- name = PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
- mandatory = false,
- description = "probability that a negative example should be retained for training")
+ name = PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
+ mandatory = false,
+ description = "probability that a negative example should be retained for training")
protected double probabilityOfKeepingANegativeExample = 0.5;
public static final String PARAM_USE_EXISTING_ENCODERS="UseExistingEncoders";
@ConfigurationParameter(name = PARAM_USE_EXISTING_ENCODERS,
- mandatory=false,
- description = "Whether to use encoders in output directory during data writing; if we are making multiple calls")
+ mandatory=false,
+ description = "Whether to use encoders in output directory during data writing; if we are making multiple calls")
private boolean useExistingEncoders=false;
public static final String PARAM_SINGLE_DOCUMENT = "SingleDocument";
@ConfigurationParameter(
- name = PARAM_SINGLE_DOCUMENT,
- mandatory = false,
- description = "Specify that coreferences should be sought for a single document.",
- defaultValue = "true" )
+ name = PARAM_SINGLE_DOCUMENT,
+ mandatory = false,
+ description = "Specify that coreferences should be sought for a single document.",
+ defaultValue = "true" )
private boolean singleDocument;
protected Random coin = new Random(0);
boolean greedyFirst = true;
-
+
private static DataWriter<String> classDataWriter = null;
-
+
public static AnalysisEngineDescription createDataWriterDescription(
- Class<? extends DataWriter<String>> dataWriterClass,
- File outputDirectory,
- float downsamplingRate) throws ResourceInitializationException {
+ Class<? extends DataWriter<String>> dataWriterClass,
+ File outputDirectory,
+ float downsamplingRate) throws ResourceInitializationException {
return AnalysisEngineFactory.createEngineDescription(
- MentionClusterCoreferenceAnnotator.class,
- CleartkAnnotator.PARAM_IS_TRAINING,
- true,
- MentionClusterCoreferenceAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
- downsamplingRate,
- DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
- dataWriterClass,
- DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
- outputDirectory,
- MentionClusterCoreferenceAnnotator.PARAM_SINGLE_DOCUMENT,
- false);
+ MentionClusterCoreferenceAnnotator.class,
+ CleartkAnnotator.PARAM_IS_TRAINING,
+ true,
+ MentionClusterCoreferenceAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
+ downsamplingRate,
+ DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
+ dataWriterClass,
+ DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+ outputDirectory,
+ MentionClusterCoreferenceAnnotator.PARAM_SINGLE_DOCUMENT,
+ false);
}
public static AnalysisEngineDescription createAnnotatorDescription(
- String modelPath) throws ResourceInitializationException {
+ String modelPath) throws ResourceInitializationException {
return AnalysisEngineFactory.createEngineDescription(
- MentionClusterCoreferenceAnnotator.class,
- CleartkAnnotator.PARAM_IS_TRAINING,
- false,
- GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
- modelPath);
+ MentionClusterCoreferenceAnnotator.class,
+ CleartkAnnotator.PARAM_IS_TRAINING,
+ false,
+ GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+ modelPath);
}
public static AnalysisEngineDescription createMultidocAnnotatorDescription(
@@ -129,9 +132,9 @@ public class MentionClusterCoreferenceAn
private List<RelationFeaturesExtractor<CollectionTextRelation,IdentifiedAnnotation>> relationExtractors = this.getFeatureExtractors();
private List<FeatureExtractor1<Markable>> mentionExtractors = this.getMentionExtractors();
private List<ClusterMentionPairer_ImplBase> pairExtractors = this.getPairExtractors();
-
+
// private Set<String> markableStrings = null;
-
+
protected List<RelationFeaturesExtractor<CollectionTextRelation,IdentifiedAnnotation>> getFeatureExtractors() {
List<RelationFeaturesExtractor<CollectionTextRelation,IdentifiedAnnotation>> extractors = new ArrayList<>();
extractors.add(new MentionClusterAgreementFeaturesExtractor());
@@ -143,9 +146,9 @@ public class MentionClusterCoreferenceAn
extractors.add(new MentionClusterSalienceFeaturesExtractor());
extractors.add(new MentionClusterAttributeFeaturesExtractor());
// extractors.add(new MentionClusterAttributeVectorExtractor()); // does nothing yet
-
+
// extractors.add(new MentionClusterDistanceFeaturesExtractor());
-
+
try {
// extractors.add(new MentionClusterDistSemExtractor("org/apache/ctakes/coreference/distsem/mimic_vectors.txt"));
// extractors.add(new MentionClusterDistSemExtractor("org/apache/ctakes/coreference/distsem/deps.words"));
@@ -153,10 +156,10 @@ public class MentionClusterCoreferenceAn
} catch (IOException e) {
e.printStackTrace();
}
-
+
return extractors;
}
-
+
protected List<FeatureExtractor1<Markable>> getMentionExtractors(){
List<FeatureExtractor1<Markable>> extractors = new ArrayList<>();
// mention features from pairwise system:
@@ -175,7 +178,7 @@ public class MentionClusterCoreferenceAn
return extractors;
}
-
+
protected List<ClusterMentionPairer_ImplBase> getPairExtractors(){
List<ClusterMentionPairer_ImplBase> pairers = new ArrayList<>();
int sentDist = 5;
@@ -186,24 +189,25 @@ public class MentionClusterCoreferenceAn
pairers.add(new PreviousDocumentPairer());
return pairers;
}
-
+
protected Iterable<CollectionTextRelationIdentifiedAnnotationPair> getCandidateRelationArgumentPairs(
- JCas jcas,
- Markable mention){
- LinkedHashSet<CollectionTextRelationIdentifiedAnnotationPair> pairs = new LinkedHashSet<>();
+ JCas jcas,
+ Markable mention){
+ LinkedHashSet<CollectionTextRelationIdentifiedAnnotationPair> pairs = new LinkedHashSet<>();
for(ClusterMentionPairer_ImplBase pairer : this.pairExtractors){
pairs.addAll(pairer.getPairs(jcas, mention));
}
-
+
return pairs;
}
-
- private void resetPairers(JCas jcas){
+
+ private void resetPairers(JCas jcas, Map<Markable,ConllDependencyNode> cache){
for(ClusterMentionPairer_ImplBase pairer : this.pairExtractors){
pairer.reset(jcas);
+ pairer.setCache(cache);
}
}
-
+
@Override
public void initialize( final UimaContext context ) throws ResourceInitializationException {
LOGGER.info( "Initializing ..." );
@@ -217,7 +221,7 @@ public class MentionClusterCoreferenceAn
LOGGER.info( "Finished." );
}
- public void notYetProcess( final JCas jCas ) throws AnalysisEngineProcessException {
+ public void process( final JCas jCas ) throws AnalysisEngineProcessException {
//this.dataWriter.write(new Instance<String>("#DEBUG " + ViewUriUtil.getURI(docCas)));
LOGGER.info( "Finding Coreferences ..." );
@@ -239,63 +243,38 @@ public class MentionClusterCoreferenceAn
LOGGER.info( "Finished." );
}
-
-
-
- @Override
- public void process( final JCas jCas ) throws AnalysisEngineProcessException {
-
- //this.dataWriter.write(new Instance<String>("#DEBUG " + ViewUriUtil.getURI(docCas)));
- LOGGER.info( "Finding Coreferences ..." );
-
- if ( singleDocument ) {
- processDocument( jCas );
- LOGGER.info( "Finished." );
- return;
- }
-
- int numDocs;
- try {
- numDocs = Integer.valueOf( jCas.getView( PatientViewsUtil.NUM_DOCS_NAME ).getDocumentText() );
- } catch ( NumberFormatException | CASException e ) {
- // TODO remove stack trace when ready
- e.printStackTrace();
- throw new AnalysisEngineProcessException( e );
- }
- try ( DotLogger dotter = new DotLogger() ) {
- for ( int docNum = 0; docNum < numDocs; docNum++ ) {
- JCas docCas;
- try {
- docCas = jCas.getView( PatientViewsUtil.getViewName( docNum ) );
- } catch ( CASException casE ) {
- // TODO remove stack trace when ready
- casE.printStackTrace();
- throw new AnalysisEngineProcessException( casE );
- }
- processDocument( docCas );
- }
- } catch ( IOException ioE ) {
- LOGGER.error( ioE.getMessage() );
- }
- LOGGER.info( "Finished." );
- }
-
private void processDocument( final JCas jCas ) throws AnalysisEngineProcessException {
// lookup from pair of annotations to binary text relation
// note: assumes that there will be at most one relation per pair
- this.resetPairers( jCas );
Map<CollectionTextRelationIdentifiedAnnotationPair, CollectionTextRelationIdentifiedAnnotationRelation>
- relationLookup;
+ relationLookup;
if ( this.isTraining() ) {
relationLookup = ClusterMentionFetcher.getPairRelations( jCas );
} else {
relationLookup = new HashMap<>();
}
- final Map<Segment, Collection<Markable>> segmentMarkables = JCasUtil.indexCovered( jCas, Segment.class, Markable.class );
- for ( Collection<Markable> markables : segmentMarkables.values() ) {
- for ( Markable mention : markables ) {
-//System.out.println( "MCCA Markable: " + mention.getCoveredText() + " :" + mention.getBegin() + "," + mention.getEnd() );
+ Map<Markable,ConllDependencyNode> depHeadMap = new HashMap<>();
+ for(Markable m: JCasUtil.select(jCas, Markable.class)){
+ ConllDependencyNode headNode = DependencyUtility.getNominalHeadNode(jCas, m);
+ depHeadMap.put(m, headNode);
+ }
+ for(RelationFeaturesExtractor featEx : this.relationExtractors){
+ if(featEx instanceof MarkableCacheRelationExtractor){
+ ((MarkableCacheRelationExtractor)featEx).setCache(depHeadMap);
+ }
+ }
+ for(FeatureExtractor1 featEx : this.mentionExtractors){
+ if(featEx instanceof MarkableCacheRelationExtractor){
+ ((MarkableCacheRelationExtractor)featEx).setCache(depHeadMap);
+ }
+ }
+ this.resetPairers( jCas, depHeadMap );
+
+ final Map<Segment, Collection<Markable>> segmentMarkables = JCasUtil.indexCovered( jCas, Segment.class, Markable.class );
+ for ( Segment segment : JCasUtil.select(jCas, Segment.class) ) {
+ for ( Markable mention : segmentMarkables.get(segment) ) {
+// System.out.println( "MCCA Markable: " + mention.getCoveredText() + " :" + mention.getBegin() + "," + mention.getEnd() );
// ConllDependencyNode headNode = DependencyUtility.getNominalHeadNode(jCas, mention);
boolean singleton = true;
double maxScore = 0.0;
@@ -306,7 +285,8 @@ public class MentionClusterCoreferenceAn
CollectionTextRelation cluster = pair.getCluster();
Markable firstElement = JCasUtil.select(cluster.getMembers(), Markable.class).iterator().next();
String clusterHeadView = firstElement.getView().getViewName();
-//System.out.println( " MCCA Pair Cluster: " + pair.getCluster().getCategory() );
+// System.out.println( " MCCA Pair Cluster: " + pair.getCluster().getCategory() );
+// System.out.println("MCCA Cluster head: " + firstElement.getCoveredText() + " :" + firstElement.getBegin() + "," + firstElement.getEnd());
// apply all the feature extractors to extract the list of features
List<Feature> features = new ArrayList<>();
for ( RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation> extractor : this.relationExtractors ) {
@@ -406,21 +386,21 @@ public class MentionClusterCoreferenceAn
createEventClusters( jCas );
}
-
-
+
+
/**
* Looks up the arguments in the specified lookup table and converts the
* relation into a label for classification
- *
+ *
* @return If this category should not be processed for training return
* <i>null</i> otherwise it returns the label sent to the datawriter
*/
protected String getRelationCategory(
- Map<CollectionTextRelationIdentifiedAnnotationPair, CollectionTextRelationIdentifiedAnnotationRelation> relationLookup,
- CollectionTextRelation cluster,
- IdentifiedAnnotation mention) {
- CollectionTextRelationIdentifiedAnnotationRelation relation =
- relationLookup.get(new CollectionTextRelationIdentifiedAnnotationPair(cluster, mention));
+ Map<CollectionTextRelationIdentifiedAnnotationPair, CollectionTextRelationIdentifiedAnnotationRelation> relationLookup,
+ CollectionTextRelation cluster,
+ IdentifiedAnnotation mention) {
+ CollectionTextRelationIdentifiedAnnotationRelation relation =
+ relationLookup.get(new CollectionTextRelationIdentifiedAnnotationPair(cluster, mention));
String category;
if (relation != null) {
category = relation.getCategory();
@@ -436,7 +416,7 @@ public class MentionClusterCoreferenceAn
* Predict an outcome given a set of features. By default, this simply
* delegates to the object's <code>classifier</code>. Subclasses may override
* this method to implement more complex classification procedures.
- *
+ *
* @param features
* The features to be classified.
* @return The predicted outcome (label) for the features.
@@ -449,7 +429,7 @@ public class MentionClusterCoreferenceAn
* Create a UIMA relation type based on arguments and the relation label. This
* allows subclasses to create/define their own types: e.g. coreference can
* create CoreferenceRelation instead of BinaryTextRelation
- *
+ *
* @param jCas
* - JCas object, needed to create new UIMA types
// * @param arg1
@@ -460,11 +440,11 @@ public class MentionClusterCoreferenceAn
* - Name of relation
*/
protected void createRelation(
- JCas jCas,
- CollectionTextRelation cluster,
- IdentifiedAnnotation mention,
- String predictedCategory,
- Double confidence) {
+ JCas jCas,
+ CollectionTextRelation cluster,
+ IdentifiedAnnotation mention,
+ String predictedCategory,
+ Double confidence) {
// add the relation to the CAS
CollectionTextRelationIdentifiedAnnotationRelation relation = new CollectionTextRelationIdentifiedAnnotationRelation(jCas);
relation.setCluster(cluster);
@@ -472,25 +452,25 @@ public class MentionClusterCoreferenceAn
relation.setCategory(predictedCategory);
relation.setConfidence(confidence);
relation.addToIndexes();
-
+
// RelationArgument arg = new RelationArgument(jCas);
// arg.setArgument(mention);
- ListFactory.append(jCas, cluster.getMembers(), mention);
+ ListFactory.append(jCas, cluster.getMembers(), mention);
}
/**
* Create the set of Event types for every chain we found in the document.
* Event is a non-Annotation type (i.e., no span) that has its own attributes
* but points to an FSArray of mentions which each have their own attributes.
- *
+ *
* @param jCas
* - JCas object, needed to create UIMA types
- * @throws AnalysisEngineProcessException
+ * @throws AnalysisEngineProcessException
*/
private static void createEventClusters(JCas jCas) throws AnalysisEngineProcessException{
// First, find the largest span identified annotation that shares a headword with the markable
// do that by finding the head of the markable, then finding the identifiedannotations that cover it:
-
+
Map<Markable, List<IdentifiedAnnotation>> markable2annotations = MarkableUtilities.indexCoveringUmlsAnnotations(jCas);
for(CollectionTextRelation cluster : JCasUtil.select(jCas, CollectionTextRelation.class)){
CounterMap<Class<? extends IdentifiedAnnotation>> headCounts = new CounterMap<>();
@@ -515,10 +495,10 @@ public class MentionClusterCoreferenceAn
element = new Event(jCas);
}else{
Class<? extends IdentifiedAnnotation> mostCommon = headCounts.entrySet().stream()
- .sorted(Map.Entry.<Class<? extends IdentifiedAnnotation>,Integer>comparingByValue().reversed())
- .limit(1)
- .map(f -> f.getKey())
- .collect(Collectors.toList()).get(0);
+ .sorted(Map.Entry.<Class<? extends IdentifiedAnnotation>,Integer>comparingByValue().reversed())
+ .limit(1)
+ .map(f -> f.getKey())
+ .collect(Collectors.toList()).get(0);
if(mostCommon.equals(DiseaseDisorderMention.class)){
element = new DiseaseDisorder(jCas);
}else if(mostCommon.equals(ProcedureMention.class)){
@@ -541,19 +521,19 @@ public class MentionClusterCoreferenceAn
private static void removeSingletonClusters(JCas jcas){
List<CollectionTextRelation> toRemove = new ArrayList<>();
- for(CollectionTextRelation rel : JCasUtil.select(jcas, CollectionTextRelation.class)){
+ for(CollectionTextRelation rel : JCasUtil.select(jcas, CollectionTextRelation.class)){
NonEmptyFSList head = (NonEmptyFSList) rel.getMembers();
if(head.getTail() instanceof EmptyFSList){
toRemove.add(rel);
}
}
-
+
for(CollectionTextRelation rel : toRemove){
rel.removeFromIndexes();
}
}
-
-
+
+
// private static final boolean dominates(Annotation arg1, Annotation arg2) {
// return (arg1.getBegin() <= arg2.getBegin() && arg1.getEnd() >= arg2.getEnd());
// }
@@ -599,7 +579,7 @@ public class MentionClusterCoreferenceAn
return bestEnts;
}
*/
-
+
public Map<HashableArguments, Double> getMarkablePairScores(JCas jCas){
Map<HashableArguments, Double> scoreMap = new HashMap<>();
for(CoreferenceRelation reln : JCasUtil.select(jCas, CoreferenceRelation.class)){
Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/PatientMentionClusterCoreferencer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/PatientMentionClusterCoreferencer.java?rev=1814594&r1=1814593&r2=1814594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/PatientMentionClusterCoreferencer.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/PatientMentionClusterCoreferencer.java Wed Nov 8 16:09:47 2017
@@ -74,5 +74,9 @@ public class PatientMentionClusterCorefe
_delegate.process( patientJcas );
}
-
+ @Override
+ public void collectionProcessComplete() throws AnalysisEngineProcessException {
+ super.collectionProcessComplete();
+ _delegate.collectionProcessComplete();
+ }
}
Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java?rev=1814594&r1=1814593&r2=1814594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java Wed Nov 8 16:09:47 2017
@@ -1,24 +1,26 @@
package org.apache.ctakes.coreference.ae.features;
-import static org.apache.ctakes.coreference.ae.MarkableHeadTreeCreator.getKey;
-
import java.util.ArrayList;
import java.util.List;
+import java.util.Map;
+import org.apache.ctakes.coreference.util.MarkableCacheRelationExtractor;
import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
import org.apache.ctakes.typesystem.type.constants.CONST;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-import org.apache.ctakes.utils.struct.MapFactory;
+import org.apache.ctakes.typesystem.type.textsem.Markable;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.cleartk.ml.Feature;
-public class TokenFeatureExtractor implements RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation> {
+public class TokenFeatureExtractor implements RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>, MarkableCacheRelationExtractor {
+
+ private Map<Markable,ConllDependencyNode> cache = null;
@Override
public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1,
@@ -36,7 +38,7 @@ public class TokenFeatureExtractor imple
feats.add(new Feature("TOKEN_DEF1", isDefinite(s1)));
feats.add(new Feature("TOKEN_DEF2", isDefinite(s2)));
feats.add(new Feature("TOKEN_NUMAGREE",
- numberSingular(jCas, arg1, s1) == numberSingular(jCas, arg2, s2)));
+ numberSingular(jCas, arg1, s1, cache.get((Markable)arg1)) == numberSingular(jCas, arg2, s2, cache.get((Markable)arg2))));
String gen1 = getGender(s1);
String gen2 = getGender(s2);
@@ -102,8 +104,7 @@ public class TokenFeatureExtractor imple
// FYI - old code used treebanknode types and found head using head rules filled in by the parser
// not sure if there is an appreciable difference...
- public static boolean numberSingular(JCas jcas, Annotation arg, String s1){
- ConllDependencyNode head = MapFactory.get(getKey(jcas), arg);
+ public static boolean numberSingular(JCas jcas, Annotation arg, String s1, ConllDependencyNode head){
// List<BaseToken> tokens = new ArrayList<>(JCasUtil.selectCovered(BaseToken.class, arg));
// for (int i = tokens.size()-1; i >=0; i--){
// BaseToken t = tokens.get(i);
@@ -166,4 +167,9 @@ public class TokenFeatureExtractor imple
public static boolean isHistory(IdentifiedAnnotation mention){
return mention.getHistoryOf() == CONST.NE_HISTORY_OF_PRESENT;
}
+
+ @Override
+ public void setCache(Map<Markable, ConllDependencyNode> cache) {
+ this.cache = cache;
+ }
}
Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java?rev=1814594&r1=1814593&r2=1814594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java Wed Nov 8 16:09:47 2017
@@ -7,10 +7,13 @@ import static org.apache.ctakes.corefere
import java.util.ArrayList;
import java.util.List;
+import java.util.Map;
import org.apache.ctakes.core.util.ListIterable;
+import org.apache.ctakes.coreference.util.MarkableCacheRelationExtractor;
import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textsem.Markable;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -19,17 +22,22 @@ import org.cleartk.ml.Feature;
import org.cleartk.ml.feature.extractor.CleartkExtractorException;
import org.cleartk.ml.feature.extractor.FeatureExtractor1;
-public class MentionClusterAgreementFeaturesExtractor implements RelationFeaturesExtractor<CollectionTextRelation,IdentifiedAnnotation>, FeatureExtractor1<Markable> {
+public class MentionClusterAgreementFeaturesExtractor implements RelationFeaturesExtractor<CollectionTextRelation,IdentifiedAnnotation>, FeatureExtractor1<Markable>, MarkableCacheRelationExtractor {
+
+ private Map<Markable, ConllDependencyNode> cache = null;
public List<Feature> extract(JCas jCas, CollectionTextRelation cluster,
IdentifiedAnnotation mention) throws AnalysisEngineProcessException {
+ if(cache == null){
+ throw new RuntimeException("This extractor requires a call to setCache()");
+ }
List<Feature> features = new ArrayList<>();
String s = mention.getCoveredText().toLowerCase();
boolean isDem = isDemonstrative(s);
boolean isDef = isDefinite(s);
String gender = getGender(s);
- boolean singular = numberSingular(jCas, mention, s);
+ boolean singular = numberSingular(jCas, mention, s, cache.get(mention));
boolean matchDem = false;
boolean matchDef = false;
@@ -55,7 +63,7 @@ public class MentionClusterAgreementFeat
if(!matchGender && getGender(m).equals(gender)){
matchGender = true;
}
- if(!matchNumber && numberSingular(jCas, member, m) == singular){
+ if(!matchNumber && numberSingular(jCas, member, m, cache.get(member)) == singular){
matchNumber = true;
}
}
@@ -70,6 +78,9 @@ public class MentionClusterAgreementFeat
@Override
public List<Feature> extract(JCas jCas, Markable mention) throws CleartkExtractorException {
+ if(cache == null){
+ throw new RuntimeException("This extractor requires a call to setCache()");
+ }
List<Feature> features = new ArrayList<>();
String s = mention.getCoveredText().toLowerCase();
@@ -82,9 +93,16 @@ public class MentionClusterAgreementFeat
String gender = getGender(s);
features.add(new Feature("MC_MENTION_GENDER", gender));
- boolean singular = numberSingular(jCas, mention, s);
+ boolean singular = numberSingular(jCas, mention, s, cache.get(mention));
features.add(new Feature("MC_MENTION_NUMBER", singular));
return features;
}
+
+ @Override
+ public void setCache(Map<Markable, ConllDependencyNode> cache) {
+ this.cache = cache;
+ }
+
+
}
Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterDepHeadExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterDepHeadExtractor.java?rev=1814594&r1=1814593&r2=1814594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterDepHeadExtractor.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterDepHeadExtractor.java Wed Nov 8 16:09:47 2017
@@ -1,20 +1,15 @@
package org.apache.ctakes.coreference.ae.features.cluster;
-import static org.apache.ctakes.coreference.ae.MarkableHeadTreeCreator.getKey;
-
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
+import java.util.*;
import org.apache.ctakes.core.util.ListIterable;
import org.apache.ctakes.coreference.ae.features.StringMatchingFeatureExtractor;
+import org.apache.ctakes.coreference.util.MarkableCacheRelationExtractor;
import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textsem.Markable;
-import org.apache.ctakes.utils.struct.MapFactory;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.cleartk.ml.Feature;
@@ -22,20 +17,27 @@ import org.cleartk.ml.feature.extractor.
import org.cleartk.ml.feature.extractor.FeatureExtractor1;
public class MentionClusterDepHeadExtractor implements
- RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation>, FeatureExtractor1<Markable> {
+ RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation>, FeatureExtractor1<Markable>,
+ MarkableCacheRelationExtractor{
+
+ Map<Markable,ConllDependencyNode> cache = null;
@Override
public List<Feature> extract(JCas jCas, CollectionTextRelation cluster,
IdentifiedAnnotation mention) throws AnalysisEngineProcessException {
List<Feature> feats = new ArrayList<>();
-
- ConllDependencyNode mentionHead = MapFactory.get(getKey(jCas), mention);
+
+ if(cache == null){
+ throw new RuntimeException("This extractor requires a cached Markable->ConllDependencyNode map to be set with setCache()");
+ }
+
+ ConllDependencyNode mentionHead = cache.get(mention);
Set<String> memberHeads = new HashSet<>();
Set<String> memberPaths = new HashSet<>();
for(Markable member : new ListIterable<Markable>(cluster.getMembers())){
if(member.getBegin() > mention.getEnd()) break;
- ConllDependencyNode memberHead = MapFactory.get(getKey(jCas), member);
+ ConllDependencyNode memberHead = cache.get(member);
if(memberHead != null){
String headWord = memberHead.getCoveredText().toLowerCase();
memberHeads.add(headWord);
@@ -64,8 +66,12 @@ public class MentionClusterDepHeadExtrac
@Override
public List<Feature> extract(JCas jCas, Markable mention) throws CleartkExtractorException {
+ if(cache == null){
+ throw new RuntimeException("This extractor requires a cached Markable->ConllDependencyNode map to be set with setCache()");
+ }
+
List<Feature> feats = new ArrayList<>();
- ConllDependencyNode mentionHead = MapFactory.get(getKey(jCas), mention);
+ ConllDependencyNode mentionHead = cache.get(mention);
if(mentionHead != null){
feats.add(new Feature("MentionRel", mentionHead.getDeprel()));
@@ -74,4 +80,8 @@ public class MentionClusterDepHeadExtrac
return feats;
}
+ @Override
+ public void setCache(Map<Markable, ConllDependencyNode> cache) {
+ this.cache = cache;
+ }
}
Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterStringFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterStringFeaturesExtractor.java?rev=1814594&r1=1814593&r2=1814594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterStringFeaturesExtractor.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterStringFeaturesExtractor.java Wed Nov 8 16:09:47 2017
@@ -1,6 +1,5 @@
package org.apache.ctakes.coreference.ae.features.cluster;
-import static org.apache.ctakes.coreference.ae.MarkableHeadTreeCreator.getKey;
import static org.apache.ctakes.coreference.ae.features.StringMatchingFeatureExtractor.contentWords;
import static org.apache.ctakes.coreference.ae.features.StringMatchingFeatureExtractor.endMatch;
import static org.apache.ctakes.coreference.ae.features.StringMatchingFeatureExtractor.soonMatch;
@@ -11,25 +10,33 @@ import static org.apache.ctakes.corefere
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import org.apache.ctakes.core.util.ListIterable;
import org.apache.ctakes.coreference.ae.features.StringMatchingFeatureExtractor;
+import org.apache.ctakes.coreference.util.MarkableCacheRelationExtractor;
import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.Markable;
import org.apache.ctakes.utils.struct.CounterMap;
-import org.apache.ctakes.utils.struct.MapFactory;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.cleartk.ml.Feature;
public class MentionClusterStringFeaturesExtractor implements
- RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation> {
+ RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation>,
+ MarkableCacheRelationExtractor{
+
+ private Map<Markable, ConllDependencyNode> cache = null;
public List<Feature> extract(JCas jCas, CollectionTextRelation cluster,
IdentifiedAnnotation mention) throws AnalysisEngineProcessException {
+ if(cache == null){
+ throw new RuntimeException("This extractor requires a cached Markable->ConllDependencyNode map to be set with setCache()");
+ }
List<Feature> feats = new ArrayList<>();
CounterMap<String> featCounts = new CounterMap<>();
@@ -38,7 +45,7 @@ public class MentionClusterStringFeature
String m = mention.getCoveredText();
Set<String> mentionWords = contentWords(mention);
Set<String> nonHeadMentionWords = new HashSet<>(mentionWords);
- ConllDependencyNode mentionHead = MapFactory.get(getKey(jCas), mention);
+ ConllDependencyNode mentionHead = cache.get(mention);
String mentionHeadString = null;
if(mentionHead != null){
@@ -62,7 +69,7 @@ public class MentionClusterStringFeature
String s = member.getCoveredText();
Set<String> memberWords = contentWords(member);
Set<String> nonHeadMemberWords = new HashSet<>(memberWords);
- ConllDependencyNode memberHead = MapFactory.get(getKey(jCas), member);
+ ConllDependencyNode memberHead = cache.get(member);
String memberHeadString = null;
if(memberHead != null){
memberHeadString = memberHead.getCoveredText().toLowerCase();
@@ -110,4 +117,8 @@ public class MentionClusterStringFeature
return count;
}
+ @Override
+ public void setCache(Map<Markable, ConllDependencyNode> cache) {
+ this.cache = cache;
+ }
}
Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterUMLSFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterUMLSFeatureExtractor.java?rev=1814594&r1=1814593&r2=1814594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterUMLSFeatureExtractor.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterUMLSFeatureExtractor.java Wed Nov 8 16:09:47 2017
@@ -1,6 +1,5 @@
package org.apache.ctakes.coreference.ae.features.cluster;
-import static org.apache.ctakes.coreference.ae.MarkableHeadTreeCreator.getKey;
import static org.apache.ctakes.coreference.ae.features.UMLSFeatureExtractor.alias;
import static org.apache.ctakes.coreference.ae.features.UMLSFeatureExtractor.getDocId;
@@ -12,6 +11,8 @@ import java.util.Map;
import java.util.Set;
import org.apache.ctakes.core.util.ListIterable;
+import org.apache.ctakes.coreference.util.HashableMarkable;
+import org.apache.ctakes.coreference.util.MarkableCacheRelationExtractor;
import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
@@ -20,7 +21,6 @@ import org.apache.ctakes.typesystem.type
import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textsem.Markable;
-import org.apache.ctakes.utils.struct.MapFactory;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
@@ -30,14 +30,21 @@ import org.cleartk.ml.feature.extractor.
import org.cleartk.ml.feature.extractor.FeatureExtractor1;
public class MentionClusterUMLSFeatureExtractor implements
- RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation>, FeatureExtractor1<Markable> {
+ RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation>, FeatureExtractor1<Markable>,
+ MarkableCacheRelationExtractor{
String docId = null;
Map<ConllDependencyNode,Collection<IdentifiedAnnotation>> coveringMap = null;
+ Map<Markable, ConllDependencyNode> cache = null;
@Override
public List<Feature> extract(JCas jCas, CollectionTextRelation cluster,
IdentifiedAnnotation mention) throws AnalysisEngineProcessException {
+
+ if(cache == null){
+ throw new RuntimeException("This extractor requires a Markable cache.");
+ }
+
List<Feature> feats = new ArrayList<>();
Set<String> trueFeats = new HashSet<>();
@@ -46,7 +53,7 @@ public class MentionClusterUMLSFeatureEx
coveringMap = JCasUtil.indexCovering(jCas, ConllDependencyNode.class, IdentifiedAnnotation.class);
}
- ConllDependencyNode head = MapFactory.get(getKey(jCas), mention);
+ ConllDependencyNode head = cache.get(mention);
if(head != null){
List<IdentifiedAnnotation> rmList = new ArrayList<>();
@@ -63,7 +70,7 @@ public class MentionClusterUMLSFeatureEx
Set<IdentifiedAnnotation> clusterEnts = new HashSet<>();
for(Markable member : new ListIterable<Markable>(cluster.getMembers())){
- ConllDependencyNode memberHead = MapFactory.get(getKey(jCas), member);
+ ConllDependencyNode memberHead = cache.get(member);
rmList.clear();
// get the named entities covering this cluster member:
List<IdentifiedAnnotation> ents2 = new ArrayList<>(coveringMap.get(memberHead)); //JCasUtil.selectCovering(jCas, IdentifiedAnnotation.class, head2.getBegin(), head2.getEnd());
@@ -189,7 +196,7 @@ public class MentionClusterUMLSFeatureEx
coveringMap = JCasUtil.indexCovering(jCas, ConllDependencyNode.class, IdentifiedAnnotation.class);
}
- ConllDependencyNode head = MapFactory.get(getKey(jCas), mention);
+ ConllDependencyNode head = cache.get(mention);
List<IdentifiedAnnotation> rmList = new ArrayList<>();
// get the entities covering this markable:
@@ -215,4 +222,8 @@ public class MentionClusterUMLSFeatureEx
return feats;
}
+ @Override
+ public void setCache(Map<Markable, ConllDependencyNode> cache) {
+ this.cache = cache;
+ }
}
Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/ClusterMentionPairer_ImplBase.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/ClusterMentionPairer_ImplBase.java?rev=1814594&r1=1814593&r2=1814594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/ClusterMentionPairer_ImplBase.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/ClusterMentionPairer_ImplBase.java Wed Nov 8 16:09:47 2017
@@ -2,25 +2,26 @@ package org.apache.ctakes.coreference.ae
//import org.apache.ctakes.coreference.ae.MentionClusterCoreferenceAnnotator.CollectionTextRelationIdentifiedAnnotationPair;
import org.apache.ctakes.coreference.ae.pairing.AnnotationPairer;
+import org.apache.ctakes.coreference.util.MarkableCacheRelationExtractor;
+import org.apache.ctakes.dependency.parser.util.DependencyUtility;
import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textsem.Markable;
-import org.apache.ctakes.utils.struct.MapFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import java.util.*;
-import static org.apache.ctakes.coreference.ae.MarkableHeadTreeCreator.getKey;
import static org.apache.ctakes.coreference.util.ClusterMentionFetcher.CollectionTextRelationIdentifiedAnnotationPair;
//import org.apache.ctakes.dependency.parser.util.DependencyUtility;
-public abstract class ClusterMentionPairer_ImplBase implements AnnotationPairer<Markable, CollectionTextRelationIdentifiedAnnotationPair> {
+public abstract class ClusterMentionPairer_ImplBase implements AnnotationPairer<Markable, CollectionTextRelationIdentifiedAnnotationPair>, MarkableCacheRelationExtractor {
public abstract List<CollectionTextRelationIdentifiedAnnotationPair> getPairs(JCas jcas, Markable m);
private Map<ConllDependencyNode,Collection<IdentifiedAnnotation>> nodeEntMap = null;
+ private Map<Markable,ConllDependencyNode> cache = null;
@Override
public void reset(JCas jcas){
@@ -39,7 +40,7 @@ public abstract class ClusterMentionPair
Set<String> bestEnts = new HashSet<>();
IdentifiedAnnotation bestEnt = null;
Set<IdentifiedAnnotation> otherBestEnts = new HashSet<>();
- ConllDependencyNode head = MapFactory.get(getKey(jcas), markable);
+ ConllDependencyNode head = cache.get(markable);
if ( head == null ) {
return Collections.emptySet();
}
@@ -47,7 +48,7 @@ public abstract class ClusterMentionPair
Collection<IdentifiedAnnotation> coveringEnts = nodeEntMap.get(head);
for(IdentifiedAnnotation ent : coveringEnts){
if(ent.getOntologyConceptArr() == null) continue; // skip non-umls entities.
- ConllDependencyNode entHead = MapFactory.get(getKey(jcas), ent);
+ ConllDependencyNode entHead = DependencyUtility.getNominalHeadNode(jcas, ent);
if(entHead == head){
if(bestEnt == null){
bestEnt = ent;
@@ -74,4 +75,9 @@ public abstract class ClusterMentionPair
protected static final boolean dominates(Annotation arg1, Annotation arg2) {
return (arg1.getBegin() <= arg2.getBegin() && arg1.getEnd() >= arg2.getEnd());
}
+
+ @Override
+ public void setCache(Map<Markable,ConllDependencyNode> cache){
+ this.cache = cache;
+ }
}