You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ra...@apache.org on 2015/03/09 14:49:19 UTC

svn commit: r1665237 - in /opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen: GeneratorFactory.java W2VClassesDictionary.java WordClusterFeatureGenerator.java

Author: ragerri
Date: Mon Mar  9 13:49:19 2015
New Revision: 1665237

URL: http://svn.apache.org/r1665237
Log:
OPENNLP-715 extending word cluster feature generator to also process Clark style clusters

Modified:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/W2VClassesDictionary.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java?rev=1665237&r1=1665236&r2=1665237&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java Mon Mar  9 13:49:19 2015
@@ -274,7 +274,9 @@ public class GeneratorFactory {
   }
 
   /**
-   * @see DictionaryFeatureGenerator
+   * Defines a word cluster generator factory; it reads an element containing
+   * 'w2vwordcluster' as a tag name; these clusters are typically produced by
+   * word2vec or clark pos induction systems.
    */
   static class W2VClassesFeatureGeneratorFactory implements XmlFeatureGeneratorFactory {
 
@@ -290,7 +292,7 @@ public class GeneratorFactory {
         throw new InvalidFormatException("Not a W2VClassesDictionary resource for key: " + dictResourceKey);
       }
 
-      return new WordClusterFeatureGenerator((W2VClassesDictionary) dictResource);
+      return new WordClusterFeatureGenerator((W2VClassesDictionary) dictResource, dictResourceKey);
     }
 
     static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/W2VClassesDictionary.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/W2VClassesDictionary.java?rev=1665237&r1=1665236&r2=1665237&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/W2VClassesDictionary.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/W2VClassesDictionary.java Mon Mar  9 13:49:19 2015
@@ -50,6 +50,11 @@ public class W2VClassesDictionary implem
 
   private Map<String, String> tokenToClusterMap = new HashMap<String, String>();
 
+  /**
+   * Read word2vec and clark clustering style lexicons.
+   * @param in the inputstream
+   * @throws IOException the io exception
+   */
   public W2VClassesDictionary(InputStream in) throws IOException {
 
     BufferedReader reader = new BufferedReader(new InputStreamReader(in, Charset.forName("UTF-8")));
@@ -57,8 +62,9 @@ public class W2VClassesDictionary implem
     String line;
     while ((line = reader.readLine()) != null) {
       String parts[] = line.split(" ");
-
-      if (parts.length == 2) {
+      if (parts.length == 3) {
+        tokenToClusterMap.put(parts[0], parts[1]);
+      } else if (parts.length == 2) {
         tokenToClusterMap.put(parts[0], parts[1]);
       }
     }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java?rev=1665237&r1=1665236&r2=1665237&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java Mon Mar  9 13:49:19 2015
@@ -22,9 +22,11 @@ import java.util.List;
 public class WordClusterFeatureGenerator extends FeatureGeneratorAdapter {
 
   private W2VClassesDictionary tokenDictionary;
+  private String resourceName;
 
-  public WordClusterFeatureGenerator(W2VClassesDictionary dict) {
+  public WordClusterFeatureGenerator(W2VClassesDictionary dict, String dictResourceKey) {
       tokenDictionary = dict;
+      resourceName = dictResourceKey;
   }
 
   public void createFeatures(List<String> features, String[] tokens, int index,
@@ -33,7 +35,7 @@ public class WordClusterFeatureGenerator
     String clusterId = tokenDictionary.lookupToken(tokens[index]);
 
     if (clusterId != null) {
-      features.add("cluster=" + clusterId);
+      features.add(resourceName + clusterId);
     }
   }
 }
\ No newline at end of file