You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/05/16 15:31:16 UTC
svn commit: r1103730 - in
/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen:
FastTokenClassFeatureGenerator.java FeatureGeneratorUtil.java
Author: joern
Date: Mon May 16 13:31:16 2011
New Revision: 1103730
URL: http://svn.apache.org/viewvc?rev=1103730&view=rev
Log:
OPENNLP-172 Deprecated fast token feature generator and it is used as default for token class now.
Modified:
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FastTokenClassFeatureGenerator.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorUtil.java
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FastTokenClassFeatureGenerator.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FastTokenClassFeatureGenerator.java?rev=1103730&r1=1103729&r2=1103730&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FastTokenClassFeatureGenerator.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FastTokenClassFeatureGenerator.java Mon May 16 13:31:16 2011
@@ -24,7 +24,10 @@ import java.util.regex.Pattern;
/**
* Generates features for different for the class of the token.
+ *
+ * @deprecated Use {@link TokenClassFeatureGenerator} instead!
*/
+@Deprecated
public class FastTokenClassFeatureGenerator extends FeatureGeneratorAdapter {
private static final String TOKEN_CLASS_PREFIX = "wc";
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorUtil.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorUtil.java?rev=1103730&r1=1103729&r2=1103730&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorUtil.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorUtil.java Mon May 16 13:31:16 2011
@@ -18,40 +18,11 @@
package opennlp.tools.util.featuregen;
-import java.util.regex.Pattern;
-
/**
* This class provide common utilities for feature generation.
*/
public class FeatureGeneratorUtil {
- private static Pattern lowercase;
- private static Pattern twoDigits;
- private static Pattern fourDigits;
- private static Pattern containsNumber;
- private static Pattern containsLetter;
- private static Pattern containsHyphens;
- private static Pattern containsBackslash;
- private static Pattern containsComma;
- private static Pattern containsPeriod;
- private static Pattern allCaps;
- private static Pattern capPeriod;
- private static Pattern initialCap;
-
- static {
- lowercase = Pattern.compile("^[a-z]+$");
- twoDigits = Pattern.compile("^[0-9][0-9]$");
- fourDigits = Pattern.compile("^[0-9][0-9][0-9][0-9]$");
- containsNumber = Pattern.compile("[0-9]");
- containsLetter = Pattern.compile("[a-zA-Z]");
- containsHyphens = Pattern.compile("-");
- containsBackslash = Pattern.compile("/");
- containsComma = Pattern.compile(",");
- containsPeriod = Pattern.compile("\\.");
- allCaps = Pattern.compile("^[A-Z]+$");
- capPeriod = Pattern.compile("^[A-Z]\\.$");
- initialCap = Pattern.compile("^[A-Z]");
- }
/**
* Generates a class name for the specified token.
* The classes are as follows where the first matching class is used:
@@ -74,53 +45,6 @@ public class FeatureGeneratorUtil {
* @return The class name that the specified token belongs in.
*/
public static String tokenFeature(String token) {
-
- String feat;
- if (lowercase.matcher(token).find()) {
- feat = "lc";
- }
- else if (twoDigits.matcher(token).find()) {
- feat = "2d";
- }
- else if (fourDigits.matcher(token).find()) {
- feat = "4d";
- }
- else if (containsNumber.matcher(token).find()) {
- if (containsLetter.matcher(token).find()) {
- feat = "an";
- }
- else if (containsHyphens.matcher(token).find()) {
- feat = "dd";
- }
- else if (containsBackslash.matcher(token).find()) {
- feat = "ds";
- }
- else if (containsComma.matcher(token).find()) {
- feat = "dc";
- }
- else if (containsPeriod.matcher(token).find()) {
- feat = "dp";
- }
- else {
- feat = "num";
- }
- }
- else if (allCaps.matcher(token).find() && token.length() == 1) {
- feat = "sc";
- }
- else if (allCaps.matcher(token).find()) {
- feat = "ac";
- }
- else if (capPeriod.matcher(token).find()) {
- feat = "cp";
- }
- else if (initialCap.matcher(token).find()) {
- feat = "ic";
- }
- else {
- feat = "other";
- }
-
- return (feat);
+ return FastTokenClassFeatureGenerator.tokenFeature(token);
}
}