You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by je...@apache.org on 2010/09/29 19:47:17 UTC
svn commit: r1002787 - in /mahout/trunk:
core/src/main/java/org/apache/mahout/clustering/
core/src/test/java/org/apache/mahout/clustering/
utils/src/main/java/org/apache/mahout/clustering/cdbw/
Author: jeastman
Date: Wed Sep 29 17:47:17 2010
New Revision: 1002787
URL: http://svn.apache.org/viewvc?rev=1002787&view=rev
Log:
MAHOUT-513
- removed weighting from GaussianAccumulator.observe(). It's not needed for
CDbw and is problematic in the OnlineGaussianAccumulator. Tests all run.
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/GaussianAccumulator.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/OnlineGaussianAccumulator.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/RunningSumsGaussianAccumulator.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestGaussianAccumulators.java
mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/GaussianAccumulator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/GaussianAccumulator.java?rev=1002787&r1=1002786&r2=1002787&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/GaussianAccumulator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/GaussianAccumulator.java Wed Sep 29 17:47:17 2010
@@ -30,15 +30,14 @@ public interface GaussianAccumulator {
public abstract Vector getVariance();
/**
- * Observe the vector with the given weight
+ * Observe the vector
*
* @param x a Vector
- * @param weight a double
*/
- public abstract void observe(Vector x, double weight);
+ public abstract void observe(Vector x);
/**
- * Compute the mean and standard deviation
+ * Compute the mean, variance and standard deviation
*/
public abstract void compute();
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/OnlineGaussianAccumulator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/OnlineGaussianAccumulator.java?rev=1002787&r1=1002786&r2=1002787&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/OnlineGaussianAccumulator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/OnlineGaussianAccumulator.java Wed Sep 29 17:47:17 2010
@@ -49,21 +49,20 @@ public class OnlineGaussianAccumulator i
}
@Override
- public void observe(Vector x, double weight) {
- n += weight;
- Vector weightedX = x.times(weight);
+ public void observe(Vector x) {
+ n++;
Vector delta;
if (mean != null) {
- delta = weightedX.minus(mean);
+ delta = x.minus(mean);
} else {
mean = x.like();
- delta = weightedX.clone();
+ delta = x.clone();
}
mean = mean.plus(delta.divide(n));
if (M2 != null) {
- M2 = M2.plus(delta.times(weightedX.minus(mean)));
+ M2 = M2.plus(delta.times(x.minus(mean)));
} else {
- M2 = delta.times(weightedX.minus(mean));
+ M2 = delta.times(x.minus(mean));
}
variance = M2.divide(n - 1);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/RunningSumsGaussianAccumulator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/RunningSumsGaussianAccumulator.java?rev=1002787&r1=1002786&r2=1002787&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/RunningSumsGaussianAccumulator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/RunningSumsGaussianAccumulator.java Wed Sep 29 17:47:17 2010
@@ -65,15 +65,14 @@ public class RunningSumsGaussianAccumula
}
@Override
- public void observe(Vector x, double weight) {
- s0 += weight;
- Vector weightedX = x.times(weight);
+ public void observe(Vector x) {
+ s0++;
if (s1 == null) {
- s1 = weightedX;
+ s1 = x.clone();
} else {
- weightedX.addTo(s1);
+ x.addTo(s1);
}
- Vector x2 = x.times(x).times(weight);
+ Vector x2 = x.times(x);
if (s2 == null) {
s2 = x2;
} else {
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestGaussianAccumulators.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestGaussianAccumulators.java?rev=1002787&r1=1002786&r2=1002787&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestGaussianAccumulators.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestGaussianAccumulators.java Wed Sep 29 17:47:17 2010
@@ -104,8 +104,8 @@ public class TestGaussianAccumulators ex
GaussianAccumulator accumulator0 = new RunningSumsGaussianAccumulator();
GaussianAccumulator accumulator1 = new OnlineGaussianAccumulator();
for (VectorWritable vw : sampleData) {
- accumulator0.observe(vw.get(), 1);
- accumulator1.observe(vw.get(), 1);
+ accumulator0.observe(vw.get());
+ accumulator1.observe(vw.get());
}
accumulator0.compute();
accumulator1.compute();
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java?rev=1002787&r1=1002786&r2=1002787&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java Wed Sep 29 17:47:17 2010
@@ -136,7 +136,7 @@ public class CDbwEvaluator {
List<VectorWritable> repPts = representativePoints.get(cI);
GaussianAccumulator accumulator = new RunningSumsGaussianAccumulator();
for (VectorWritable vw : repPts) {
- accumulator.observe(vw.get(), 1);
+ accumulator.observe(vw.get());
}
accumulator.compute();
double d = accumulator.getAverageStd();