You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2014/10/14 23:51:36 UTC
svn commit: r1631898 -
/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/DependencyPathRegexpFeatureExtractor.java
Author: tmill
Date: Tue Oct 14 21:51:36 2014
New Revision: 1631898
URL: http://svn.apache.org/r1631898
Log:
Add feature class that broke other check-in.
Added:
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/DependencyPathRegexpFeatureExtractor.java
Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/DependencyPathRegexpFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/DependencyPathRegexpFeatureExtractor.java?rev=1631898&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/DependencyPathRegexpFeatureExtractor.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/DependencyPathRegexpFeatureExtractor.java Tue Oct 14 21:51:36 2014
@@ -0,0 +1,65 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.extractors;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Scanner;
+
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.dependency.parser.util.DependencyPath;
+import org.apache.ctakes.dependency.parser.util.DependencyUtility;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+import org.cleartk.ml.feature.extractor.FeatureExtractor1;
+
+public class DependencyPathRegexpFeatureExtractor implements
+ FeatureExtractor1<IdentifiedAnnotation> {
+
+ HashMap<String,Integer> patts = new HashMap<>();
+
+ public DependencyPathRegexpFeatureExtractor() throws FileNotFoundException{
+ File pathFile = FileLocator.locateFile("org/apache/ctakes/assertion/models/uncDepPathRegexps.txt");
+ Scanner scanner = new Scanner(pathFile);
+ while(scanner.hasNextLine()){
+ String[] featAndWeight = scanner.nextLine().trim().split("\t");
+ String feat = featAndWeight[0];
+ Double weight = Double.parseDouble(featAndWeight[1]);
+ int val;
+ val = patts.size(); // one feat per pattern
+// val = 1; // map all to same feat
+// val = feat.split("[<>]").length; // different feats for bi-,tri-,4-gram features
+// val = (int) Math.round(Math.log(weight));
+// if(val > 3){
+ patts.put(feat, val);
+// }
+ }
+ scanner.close();
+ }
+
+ public List<Feature> extract(JCas jcas, IdentifiedAnnotation mention)
+ throws CleartkExtractorException {
+ List<Feature> feats = new ArrayList<>();
+ int sentWeight = 0;
+ ConllDependencyNode node = DependencyUtility.getNominalHeadNode(jcas, mention);
+ List<ConllDependencyNode> sentNodes = DependencyUtility.getDependencyNodes(jcas, DependencyUtility.getSentence(jcas, node));
+ for(ConllDependencyNode neighborNode : sentNodes){
+ if(node == neighborNode) continue;
+ DependencyPath path = DependencyUtility.getPath(sentNodes, node, neighborNode);
+ String pathString = path.toString().replace('\n', ' ').replaceFirst("\\{[^\\}]+\\}", "{CONCEPT}").replace(' ', '_');
+ if(patts.containsKey(pathString)){
+// sentWeight += patts.get(pathString);
+ feats.add(new Feature("DepPathRegexp" + patts.get(pathString), true)); // one feat per pattern
+ }
+// out.println("dep: " + path.toString().replace('\n', ' ').replaceFirst("\\{[^\\}]+\\}", "{CONCEPT}"));
+ }
+// feats.add(new Feature("DepPathRegexp", sentWeight));
+ return feats;
+ }
+
+}