You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ra...@apache.org on 2015/03/09 13:45:48 UTC

svn commit: r1665214 - in /opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen: PreviousTwoMapFeatureGenerator.java TrigramNameFeatureGenerator.java

Author: ragerri
Date: Mon Mar  9 12:45:47 2015
New Revision: 1665214

URL: http://svn.apache.org/r1665214
Log:
OPENNLP-716 adding local features that combine well with Brown clustering features

Added:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PreviousTwoMapFeatureGenerator.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TrigramNameFeatureGenerator.java

Added: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PreviousTwoMapFeatureGenerator.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PreviousTwoMapFeatureGenerator.java?rev=1665214&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PreviousTwoMapFeatureGenerator.java (added)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PreviousTwoMapFeatureGenerator.java Mon Mar  9 12:45:47 2015
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * This {@link FeatureGeneratorAdapter} generates features indicating the outcome associated with two previously occuring words.
+ */
+public class PreviousTwoMapFeatureGenerator implements AdaptiveFeatureGenerator {
+
+  private Map<String, String> previousMap = new HashMap<String, String>();
+
+  /**
+   * Generates previous decision features for the token based on contents of the previous map.
+   */
+  public void createFeatures(List<String> features, String[] tokens, int index, String[] preds) {
+    
+    if (index > 0) {
+      features.add("ppd=" + previousMap.get(tokens[index]) + "," + previousMap.get(tokens[index - 1]));
+    }
+  }
+
+  public void updateAdaptiveData(String[] tokens, String[] outcomes) {
+
+    for (int i = 0; i < tokens.length; i++) {
+      previousMap.put(tokens[i], outcomes[i]);
+    }
+  }
+
+  /**
+   * Clears the previous map.
+   */
+  public void clearAdaptiveData() {
+    previousMap.clear();
+  }
+}

Added: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TrigramNameFeatureGenerator.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TrigramNameFeatureGenerator.java?rev=1665214&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TrigramNameFeatureGenerator.java (added)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TrigramNameFeatureGenerator.java Mon Mar  9 12:45:47 2015
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.List;
+
+import opennlp.tools.util.featuregen.FeatureGeneratorAdapter;
+
+/**
+ * Adds trigram features based on tokens and token classes.
+ * 
+ */
+public class TrigramNameFeatureGenerator extends FeatureGeneratorAdapter {
+
+  public void createFeatures(List<String> features, String[] tokens, int index,
+      String[] previousOutcomes) {
+    String wc = FeatureGeneratorUtil.tokenFeature(tokens[index]);
+    // trigram features
+    if (index > 1) {
+      features.add("ppw,pw,w=" + tokens[index - 2] + "," + tokens[index - 1] + "," + tokens[index]);
+      String pwc = FeatureGeneratorUtil.tokenFeature(tokens[index - 1]);
+      String ppwc = FeatureGeneratorUtil.tokenFeature(tokens[index - 2]);
+      features.add("ppwc,pwc,wc=" + ppwc + "," + pwc + "," + wc);
+    }
+    if (index + 2 < tokens.length) {
+      features.add("w,nw,nnw=" + tokens[index] + "," + tokens[index + 1] + "," + tokens[index + 2]);
+      String nwc = FeatureGeneratorUtil.tokenFeature(tokens[index + 1]);
+      String nnwc = FeatureGeneratorUtil.tokenFeature(tokens[index + 2]);
+      features.add("wc,nwc,nnwc=" + wc + "," + nwc + "," + nnwc);
+    }
+  }
+}