You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by je...@apache.org on 2010/09/29 19:47:17 UTC

svn commit: r1002787 - in /mahout/trunk: core/src/main/java/org/apache/mahout/clustering/ core/src/test/java/org/apache/mahout/clustering/ utils/src/main/java/org/apache/mahout/clustering/cdbw/

Author: jeastman
Date: Wed Sep 29 17:47:17 2010
New Revision: 1002787

URL: http://svn.apache.org/viewvc?rev=1002787&view=rev
Log:
MAHOUT-513
- removed weighting from GaussianAccumulator.observe(). It's not needed for
CDbw and is problematic in the OnlineGaussianAccumulator.  Tests all run.

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/GaussianAccumulator.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/OnlineGaussianAccumulator.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/RunningSumsGaussianAccumulator.java
    mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestGaussianAccumulators.java
    mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/GaussianAccumulator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/GaussianAccumulator.java?rev=1002787&r1=1002786&r2=1002787&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/GaussianAccumulator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/GaussianAccumulator.java Wed Sep 29 17:47:17 2010
@@ -30,15 +30,14 @@ public interface GaussianAccumulator {
   public abstract Vector getVariance();
 
   /**
-   * Observe the vector with the given weight
+   * Observe the vector 
    * 
    * @param x a Vector
-   * @param weight a double
    */
-  public abstract void observe(Vector x, double weight);
+  public abstract void observe(Vector x);
 
   /**
-   * Compute the mean and standard deviation
+   * Compute the mean, variance and standard deviation
    */
   public abstract void compute();
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/OnlineGaussianAccumulator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/OnlineGaussianAccumulator.java?rev=1002787&r1=1002786&r2=1002787&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/OnlineGaussianAccumulator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/OnlineGaussianAccumulator.java Wed Sep 29 17:47:17 2010
@@ -49,21 +49,20 @@ public class OnlineGaussianAccumulator i
   }
 
   @Override
-  public void observe(Vector x, double weight) {
-    n += weight;
-    Vector weightedX = x.times(weight);
+  public void observe(Vector x) {
+    n++;
     Vector delta;
     if (mean != null) {
-      delta = weightedX.minus(mean);
+      delta = x.minus(mean);
     } else {
       mean = x.like();
-      delta = weightedX.clone();
+      delta = x.clone();
     }
     mean = mean.plus(delta.divide(n));
     if (M2 != null) {
-      M2 = M2.plus(delta.times(weightedX.minus(mean)));
+      M2 = M2.plus(delta.times(x.minus(mean)));
     } else {
-      M2 = delta.times(weightedX.minus(mean));
+      M2 = delta.times(x.minus(mean));
     }
     variance = M2.divide(n - 1);
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/RunningSumsGaussianAccumulator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/RunningSumsGaussianAccumulator.java?rev=1002787&r1=1002786&r2=1002787&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/RunningSumsGaussianAccumulator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/RunningSumsGaussianAccumulator.java Wed Sep 29 17:47:17 2010
@@ -65,15 +65,14 @@ public class RunningSumsGaussianAccumula
   }
 
   @Override
-  public void observe(Vector x, double weight) {
-    s0 += weight;
-    Vector weightedX = x.times(weight);
+  public void observe(Vector x) {
+    s0++;
     if (s1 == null) {
-      s1 = weightedX;
+      s1 = x.clone();
     } else {
-      weightedX.addTo(s1);
+      x.addTo(s1);
     }
-    Vector x2 = x.times(x).times(weight);
+    Vector x2 = x.times(x);
     if (s2 == null) {
       s2 = x2;
     } else {

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestGaussianAccumulators.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestGaussianAccumulators.java?rev=1002787&r1=1002786&r2=1002787&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestGaussianAccumulators.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestGaussianAccumulators.java Wed Sep 29 17:47:17 2010
@@ -104,8 +104,8 @@ public class TestGaussianAccumulators ex
     GaussianAccumulator accumulator0 = new RunningSumsGaussianAccumulator();
     GaussianAccumulator accumulator1 = new OnlineGaussianAccumulator();
     for (VectorWritable vw : sampleData) {
-      accumulator0.observe(vw.get(), 1);
-      accumulator1.observe(vw.get(), 1);
+      accumulator0.observe(vw.get());
+      accumulator1.observe(vw.get());
     }
     accumulator0.compute();
     accumulator1.compute();

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java?rev=1002787&r1=1002786&r2=1002787&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java Wed Sep 29 17:47:17 2010
@@ -136,7 +136,7 @@ public class CDbwEvaluator {
     List<VectorWritable> repPts = representativePoints.get(cI);
     GaussianAccumulator accumulator = new RunningSumsGaussianAccumulator();
     for (VectorWritable vw : repPts) {
-      accumulator.observe(vw.get(), 1);
+      accumulator.observe(vw.get());
     }
     accumulator.compute();
     double d = accumulator.getAverageStd();