You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by td...@apache.org on 2010/09/17 21:51:49 UTC
svn commit: r998289 - in
/mahout/trunk/core/src/main/java/org/apache/mahout/vectors:
FeatureVectorEncoder.java TextValueEncoder.java
Author: tdunning
Date: Fri Sep 17 19:51:49 2010
New Revision: 998289
URL: http://svn.apache.org/viewvc?rev=998289&view=rev
Log:
Made text flush. Will change this again, likely.
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/FeatureVectorEncoder.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/TextValueEncoder.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectors/FeatureVectorEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectors/FeatureVectorEncoder.java?rev=998289&r1=998288&r2=998289&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectors/FeatureVectorEncoder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectors/FeatureVectorEncoder.java Fri Sep 17 19:51:49 2010
@@ -276,4 +276,8 @@ public abstract class FeatureVectorEncod
protected byte[] bytesForString(String x){
return x.getBytes(Charsets.UTF_8);
}
+
+ public void flush(double weight, Vector data) {
+ // default is to do nothing
+ }
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectors/TextValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectors/TextValueEncoder.java?rev=998289&r1=998288&r2=998289&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectors/TextValueEncoder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectors/TextValueEncoder.java Fri Sep 17 19:51:49 2010
@@ -54,7 +54,6 @@ public class TextValueEncoder extends Fe
*/
@Override
public void addToVector(byte[] originalForm, double weight, Vector data) {
- Multiset<String> counts = HashMultiset.create();
for (String word : tokenize(new String(originalForm))) {
counts.add(word);
}
@@ -65,6 +64,7 @@ public class TextValueEncoder extends Fe
* @param weight
* @param data
*/
+ @Override
public void flush(double weight, Vector data) {
for (String word : counts.elementSet()) {
// weight words by log_2(tf) times whatever other weight we are given