You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2013/11/27 15:22:14 UTC

svn commit: r1546044 - /ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/BrownClusterFeatureFunction.java

Author: tmill
Date: Wed Nov 27 14:22:14 2013
New Revision: 1546044

URL: http://svn.apache.org/r1546044
Log:
Added feature function for adding Brown cluster feature along with token features.

Added:
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/BrownClusterFeatureFunction.java   (with props)

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/BrownClusterFeatureFunction.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/BrownClusterFeatureFunction.java?rev=1546044&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/BrownClusterFeatureFunction.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/BrownClusterFeatureFunction.java Wed Nov 27 14:22:14 2013
@@ -0,0 +1,53 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.extractors;
+
+import java.io.FileNotFoundException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Scanner;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import javax.annotation.Nullable;
+
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.feature.function.FeatureFunction;
+
+import com.google.common.collect.Maps;
+
+public class BrownClusterFeatureFunction implements FeatureFunction {
+
+  public static final String DEFAULT_NAME = "BrownCluster";
+  public static final Pattern linePatt = Pattern.compile("^(\\d+)\\s+(\\S+)\\s+(\\d+)");
+  
+  private HashMap<String,String> word2class = null;
+  
+  public BrownClusterFeatureFunction() throws ResourceInitializationException{
+    word2class = Maps.newHashMap();
+    try{
+      Scanner scanner = new Scanner(FileLocator.getAsStream("org/apache/ctakes/assertion/models/brown_clusters.txt"));
+      while(scanner.hasNextLine()){
+        String line = scanner.nextLine().trim();
+        Matcher m = linePatt.matcher(line);
+        if(m.matches()){
+          word2class.put(m.group(2), m.group(1));
+        }
+      }
+    }catch(FileNotFoundException e){
+      throw new ResourceInitializationException(e);
+    }
+  }
+  
+  @Override
+  public List<Feature> apply(@Nullable Feature input) {
+    String featureName = Feature.createName(DEFAULT_NAME, input.getName());
+    Object featureValue = input.getValue();
+    if(featureValue instanceof String){
+      return Collections.singletonList(new Feature(featureName, word2class.containsKey(featureValue) ? word2class.get(featureValue) : "NoCluster"));
+    }
+    return Collections.emptyList();
+  }
+
+}

Propchange: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/BrownClusterFeatureFunction.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain