You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/05/16 15:31:16 UTC

svn commit: r1103730 - in /incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen: FastTokenClassFeatureGenerator.java FeatureGeneratorUtil.java

Author: joern
Date: Mon May 16 13:31:16 2011
New Revision: 1103730

URL: http://svn.apache.org/viewvc?rev=1103730&view=rev
Log:
OPENNLP-172 Deprecated fast token feature generator and it is used as default for token class now.

Modified:
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FastTokenClassFeatureGenerator.java
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorUtil.java

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FastTokenClassFeatureGenerator.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FastTokenClassFeatureGenerator.java?rev=1103730&r1=1103729&r2=1103730&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FastTokenClassFeatureGenerator.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FastTokenClassFeatureGenerator.java Mon May 16 13:31:16 2011
@@ -24,7 +24,10 @@ import java.util.regex.Pattern;
 
 /**
  * Generates features for different for the class of the token.
+ * 
+ * @deprecated Use {@link TokenClassFeatureGenerator} instead!
  */
+@Deprecated 
 public class FastTokenClassFeatureGenerator extends FeatureGeneratorAdapter {
 
   private static final String TOKEN_CLASS_PREFIX = "wc";

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorUtil.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorUtil.java?rev=1103730&r1=1103729&r2=1103730&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorUtil.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorUtil.java Mon May 16 13:31:16 2011
@@ -18,40 +18,11 @@
 
 package opennlp.tools.util.featuregen;
 
-import java.util.regex.Pattern;
-
 /**
  * This class provide common utilities for feature generation.
  */
 public class FeatureGeneratorUtil {
 
-  private static Pattern lowercase;
-  private static Pattern twoDigits;
-  private static Pattern fourDigits;
-  private static Pattern containsNumber;
-  private static Pattern containsLetter;
-  private static Pattern containsHyphens;
-  private static Pattern containsBackslash;
-  private static Pattern containsComma;
-  private static Pattern containsPeriod;
-  private static Pattern allCaps;
-  private static Pattern capPeriod;
-  private static Pattern initialCap;
-
-  static {
-    lowercase = Pattern.compile("^[a-z]+$");
-    twoDigits = Pattern.compile("^[0-9][0-9]$");
-    fourDigits = Pattern.compile("^[0-9][0-9][0-9][0-9]$");
-    containsNumber = Pattern.compile("[0-9]");
-    containsLetter = Pattern.compile("[a-zA-Z]");
-    containsHyphens = Pattern.compile("-");
-    containsBackslash = Pattern.compile("/");
-    containsComma = Pattern.compile(",");
-    containsPeriod = Pattern.compile("\\.");
-    allCaps = Pattern.compile("^[A-Z]+$");
-    capPeriod = Pattern.compile("^[A-Z]\\.$");
-    initialCap = Pattern.compile("^[A-Z]");
-  }
   /**
    * Generates a class name for the specified token.
    * The classes are as follows where the first matching class is used:
@@ -74,53 +45,6 @@ public class FeatureGeneratorUtil {
    * @return The class name that the specified token belongs in.
    */
   public static String tokenFeature(String token) {
-
-    String feat;
-    if (lowercase.matcher(token).find()) {
-      feat = "lc";
-    }
-    else if (twoDigits.matcher(token).find()) {
-      feat = "2d";
-    }
-    else if (fourDigits.matcher(token).find()) {
-      feat = "4d";
-    }
-    else if (containsNumber.matcher(token).find()) {
-      if (containsLetter.matcher(token).find()) {
-        feat = "an";
-      }
-      else if (containsHyphens.matcher(token).find()) {
-        feat = "dd";
-      }
-      else if (containsBackslash.matcher(token).find()) {
-        feat = "ds";
-      }
-      else if (containsComma.matcher(token).find()) {
-        feat = "dc";
-      }
-      else if (containsPeriod.matcher(token).find()) {
-        feat = "dp";
-      }
-      else {
-        feat = "num";
-      }
-    }
-    else if (allCaps.matcher(token).find() && token.length() == 1) {
-      feat = "sc";
-    }
-    else if (allCaps.matcher(token).find()) {
-      feat = "ac";
-    }
-    else if (capPeriod.matcher(token).find()) {
-      feat = "cp";
-    }
-    else if (initialCap.matcher(token).find()) {
-      feat = "ic";
-    }
-    else {
-      feat = "other";
-    }
-
-    return (feat);
+    return FastTokenClassFeatureGenerator.tokenFeature(token);
   }
 }