You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ra...@apache.org on 2015/03/09 14:49:19 UTC
svn commit: r1665237 - in
/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen:
GeneratorFactory.java W2VClassesDictionary.java
WordClusterFeatureGenerator.java
Author: ragerri
Date: Mon Mar 9 13:49:19 2015
New Revision: 1665237
URL: http://svn.apache.org/r1665237
Log:
OPENNLP-715 extending word cluster feature generator to also process Clark style clusters
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/W2VClassesDictionary.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java?rev=1665237&r1=1665236&r2=1665237&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java Mon Mar 9 13:49:19 2015
@@ -274,7 +274,9 @@ public class GeneratorFactory {
}
/**
- * @see DictionaryFeatureGenerator
+ * Defines a word cluster generator factory; it reads an element containing
+ * 'w2vwordcluster' as a tag name; these clusters are typically produced by
+ * word2vec or clark pos induction systems.
*/
static class W2VClassesFeatureGeneratorFactory implements XmlFeatureGeneratorFactory {
@@ -290,7 +292,7 @@ public class GeneratorFactory {
throw new InvalidFormatException("Not a W2VClassesDictionary resource for key: " + dictResourceKey);
}
- return new WordClusterFeatureGenerator((W2VClassesDictionary) dictResource);
+ return new WordClusterFeatureGenerator((W2VClassesDictionary) dictResource, dictResourceKey);
}
static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/W2VClassesDictionary.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/W2VClassesDictionary.java?rev=1665237&r1=1665236&r2=1665237&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/W2VClassesDictionary.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/W2VClassesDictionary.java Mon Mar 9 13:49:19 2015
@@ -50,6 +50,11 @@ public class W2VClassesDictionary implem
private Map<String, String> tokenToClusterMap = new HashMap<String, String>();
+ /**
+ * Read word2vec and clark clustering style lexicons.
+ * @param in the inputstream
+ * @throws IOException the io exception
+ */
public W2VClassesDictionary(InputStream in) throws IOException {
BufferedReader reader = new BufferedReader(new InputStreamReader(in, Charset.forName("UTF-8")));
@@ -57,8 +62,9 @@ public class W2VClassesDictionary implem
String line;
while ((line = reader.readLine()) != null) {
String parts[] = line.split(" ");
-
- if (parts.length == 2) {
+ if (parts.length == 3) {
+ tokenToClusterMap.put(parts[0], parts[1]);
+ } else if (parts.length == 2) {
tokenToClusterMap.put(parts[0], parts[1]);
}
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java?rev=1665237&r1=1665236&r2=1665237&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java Mon Mar 9 13:49:19 2015
@@ -22,9 +22,11 @@ import java.util.List;
public class WordClusterFeatureGenerator extends FeatureGeneratorAdapter {
private W2VClassesDictionary tokenDictionary;
+ private String resourceName;
- public WordClusterFeatureGenerator(W2VClassesDictionary dict) {
+ public WordClusterFeatureGenerator(W2VClassesDictionary dict, String dictResourceKey) {
tokenDictionary = dict;
+ resourceName = dictResourceKey;
}
public void createFeatures(List<String> features, String[] tokens, int index,
@@ -33,7 +35,7 @@ public class WordClusterFeatureGenerator
String clusterId = tokenDictionary.lookupToken(tokens[index]);
if (clusterId != null) {
- features.add("cluster=" + clusterId);
+ features.add(resourceName + clusterId);
}
}
}
\ No newline at end of file