You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2021/02/15 22:33:48 UTC
svn commit: r1886553 - in
/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors:
ContextWordWindowExtractor.java NegationDependencyFeatureExtractor.java
Author: tmill
Date: Mon Feb 15 22:33:47 2021
New Revision: 1886553
URL: http://svn.apache.org/viewvc?rev=1886553&view=rev
Log:
Optimizing attribute feature selectors by caching selectCovering and better tree walking.
Modified:
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/ContextWordWindowExtractor.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/NegationDependencyFeatureExtractor.java
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/ContextWordWindowExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/ContextWordWindowExtractor.java?rev=1886553&r1=1886552&r2=1886553&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/ContextWordWindowExtractor.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/ContextWordWindowExtractor.java Mon Feb 15 22:33:47 2021
@@ -20,13 +20,11 @@ package org.apache.ctakes.assertion.medf
import java.io.IOException;
import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Scanner;
+import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import org.apache.ctakes.core.util.doc.DocIdUtil;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
@@ -41,6 +39,9 @@ public class ContextWordWindowExtractor
private HashMap<String,Double> termVals = null;
private static final Pattern linePatt = Pattern.compile("^([^ ]+) : (.+)$");
private static double[] weights = new double[50];
+ private Map<IdentifiedAnnotation, Collection<Sentence>> cachedIndex = new HashMap<>();
+ private String cachedDocId = "__NONE__";
+
static{
weights[0] = 1.0;
for(int i = 1; i < weights.length; i++){
@@ -84,8 +85,13 @@ public class ContextWordWindowExtractor
@Override
public List<Feature> extract(JCas view, IdentifiedAnnotation mention)
throws CleartkExtractorException {
+ if(!DocIdUtil.getDocumentID(view).equals(cachedDocId)){
+ cachedIndex = JCasUtil.indexCovering(view, IdentifiedAnnotation.class, Sentence.class );
+ cachedDocId = DocIdUtil.getDocumentID(view);
+ }
ArrayList<Feature> feats = new ArrayList<Feature>();
- List<Sentence> sents = JCasUtil.selectCovering(view, Sentence.class, mention.getBegin(), mention.getEnd());
+ List<Sentence> sents = new ArrayList<>(cachedIndex.get(mention));
+
if(sents.size() == 0) return feats;
Sentence sent = sents.get(0);
List<BaseToken> tokens = JCasUtil.selectCovered(BaseToken.class, sent);
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/NegationDependencyFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/NegationDependencyFeatureExtractor.java?rev=1886553&r1=1886552&r2=1886553&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/NegationDependencyFeatureExtractor.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/NegationDependencyFeatureExtractor.java Mon Feb 15 22:33:47 2021
@@ -19,9 +19,12 @@
package org.apache.ctakes.assertion.medfacts.cleartk.extractors;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.List;
+import java.util.Map;
import org.apache.ctakes.assertion.util.NegationManualDepContextAnalyzer;
+import org.apache.ctakes.core.util.doc.DocIdUtil;
import org.apache.ctakes.dependency.parser.util.DependencyUtility;
import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
@@ -45,22 +48,27 @@ public class NegationDependencyFeatureEx
public List<Feature> extract(JCas jcas, IdentifiedAnnotation focusAnnotation)
throws CleartkExtractorException {
List<Feature> feats = new ArrayList<>();
- Sentence sent = null;
-
- List<Sentence> sents = JCasUtil.selectCovering(jcas, Sentence.class, focusAnnotation.getBegin(), focusAnnotation.getEnd());
- if(sents != null && sents.size() > 0){
- sent = sents.get(0);
- }else{
+
+ // get the dependency node for the annotation we're annotating
+ ConllDependencyNode headNode = DependencyUtility.getNominalHeadNode(jcas, focusAnnotation);
+
+ // walk up the tree to the root, which has a span of the whole sentence
+ ConllDependencyNode rootNode = headNode;
+ while(rootNode.getId() != 0){
+ rootNode = rootNode.getHead();
+ }
+ // use the root node to get all the nodes for this sentence
+ List<ConllDependencyNode> nodes = DependencyUtility.getDependencyNodes(jcas, rootNode);
+ if(nodes.size() > 400){
+ // most things with hundreds of tokens are not in fact syntactically interesting, but they take a really
+ // long time to process, so we can skip them.
return feats;
}
-
- List<ConllDependencyNode> nodes = DependencyUtility.getDependencyNodes(jcas, sent);
- ConllDependencyNode headNode = DependencyUtility.getNominalHeadNode(jcas, focusAnnotation);
try {
boolean[] regexFeats = conAnal.findNegationContext(nodes, headNode);
for(int j = 0; j < regexFeats.length; j++){
if(regexFeats[j]){
- feats.add(new Feature("DepPath_" + conAnal.getRegexName(j))); //"NEG_DEP_REGEX_"+j));
+ feats.add(new Feature("DepPath_" + conAnal.getRegexName(j)));
}
}
} catch (Exception e) {