You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2013/01/02 14:55:23 UTC

svn commit: r1427791 - in /mahout/trunk: ./ core/ core/src/main/java/org/apache/mahout/cf/taste/impl/common/ core/src/main/java/org/apache/mahout/common/iterator/ core/src/test/java/org/apache/mahout/cf/taste/impl/common/ math/ math/src/main/java/org/a...

Author: srowen
Date: Wed Jan  2 13:55:23 2013
New Revision: 1427791

URL: http://svn.apache.org/viewvc?rev=1427791&view=rev
Log:
Update to Commons Math 3.1 and fix sampling iterator test

Modified:
    mahout/trunk/core/pom.xml
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIterator.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/SamplingIterator.java
    mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIteratorTest.java
    mahout/trunk/math/pom.xml
    mahout/trunk/math/src/main/java/org/apache/mahout/math/random/PoissonSampler.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/ssvd/EigenSolverWrapper.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/DistributionChecks.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/random/NormalTest.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/random/PoissonSamplerTest.java
    mahout/trunk/pom.xml

Modified: mahout/trunk/core/pom.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/core/pom.xml?rev=1427791&r1=1427790&r2=1427791&view=diff
==============================================================================
--- mahout/trunk/core/pom.xml (original)
+++ mahout/trunk/core/pom.xml Wed Jan  2 13:55:23 2013
@@ -184,7 +184,7 @@
 
     <dependency>
       <groupId>org.apache.commons</groupId>
-      <artifactId>commons-math</artifactId>
+      <artifactId>commons-math3</artifactId>
     </dependency>
 
     <dependency>

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIterator.java?rev=1427791&r1=1427790&r2=1427791&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIterator.java Wed Jan  2 13:55:23 2013
@@ -20,8 +20,7 @@ package org.apache.mahout.cf.taste.impl.
 import java.util.NoSuchElementException;
 
 import com.google.common.base.Preconditions;
-import org.apache.commons.math.MathException;
-import org.apache.commons.math.distribution.PascalDistributionImpl;
+import org.apache.commons.math3.distribution.PascalDistribution;
 
 /**
  * Wraps a {@link LongPrimitiveIterator} and returns only some subset of the elements that it would,
@@ -29,7 +28,7 @@ import org.apache.commons.math.distribut
  */
 public final class SamplingLongPrimitiveIterator extends AbstractLongPrimitiveIterator {
   
-  private final PascalDistributionImpl geometricDistribution;
+  private final PascalDistribution geometricDistribution;
   private final LongPrimitiveIterator delegate;
   private long next;
   private boolean hasNext;
@@ -38,7 +37,7 @@ public final class SamplingLongPrimitive
     Preconditions.checkNotNull(delegate);
     Preconditions.checkArgument(samplingRate > 0.0 && samplingRate <= 1.0);
     // Geometric distribution is special case of negative binomial (aka Pascal) with r=1:
-    geometricDistribution = new PascalDistributionImpl(1, samplingRate);
+    geometricDistribution = new PascalDistribution(1, samplingRate);
     this.delegate = delegate;
     this.hasNext = true;
     doNext();
@@ -68,12 +67,7 @@ public final class SamplingLongPrimitive
   }
   
   private void doNext() {
-    int toSkip;
-    try {
-      toSkip = geometricDistribution.sample();
-    } catch (MathException e) {
-      throw new IllegalStateException(e);
-    }
+    int toSkip = geometricDistribution.sample();
     delegate.skip(toSkip);
     if (delegate.hasNext()) {
       next = delegate.next();
@@ -93,12 +87,8 @@ public final class SamplingLongPrimitive
   @Override
   public void skip(int n) {
     int toSkip = 0;
-    try {
-      for (int i = 0; i < n; i++) {
-        toSkip += geometricDistribution.sample();
-      }
-    } catch (MathException e) {
-      throw new IllegalStateException(e);
+    for (int i = 0; i < n; i++) {
+      toSkip += geometricDistribution.sample();
     }
     delegate.skip(toSkip);
     if (delegate.hasNext()) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/SamplingIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/SamplingIterator.java?rev=1427791&r1=1427790&r2=1427791&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/SamplingIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/SamplingIterator.java Wed Jan  2 13:55:23 2013
@@ -21,8 +21,7 @@ import java.util.Iterator;
 
 import com.google.common.base.Preconditions;
 import com.google.common.collect.AbstractIterator;
-import org.apache.commons.math.MathException;
-import org.apache.commons.math.distribution.PascalDistributionImpl;
+import org.apache.commons.math3.distribution.PascalDistribution;
 import org.apache.mahout.cf.taste.impl.common.SkippingIterator;
 
 /**
@@ -31,25 +30,20 @@ import org.apache.mahout.cf.taste.impl.c
  */
 public final class SamplingIterator<T> extends AbstractIterator<T> {
   
-  private final PascalDistributionImpl geometricDistribution;
+  private final PascalDistribution geometricDistribution;
   private final Iterator<? extends T> delegate;
 
   public SamplingIterator(Iterator<? extends T> delegate, double samplingRate) {
     Preconditions.checkNotNull(delegate);
     Preconditions.checkArgument(samplingRate > 0.0 && samplingRate <= 1.0);
     // Geometric distribution is special case of negative binomial (aka Pascal) with r=1:
-    geometricDistribution = new PascalDistributionImpl(1, samplingRate);
+    geometricDistribution = new PascalDistribution(1, samplingRate);
     this.delegate = delegate;
   }
 
   @Override
   protected T computeNext() {
-    int toSkip;
-    try {
-      toSkip = geometricDistribution.sample();
-    } catch (MathException e) {
-      throw new IllegalStateException(e);
-    }
+    int toSkip = geometricDistribution.sample();
     if (delegate instanceof SkippingIterator<?>) {
       SkippingIterator<? extends T> skippingDelegate = (SkippingIterator<? extends T>) delegate;
       skippingDelegate.skip(toSkip);

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIteratorTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIteratorTest.java?rev=1427791&r1=1427790&r2=1427791&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIteratorTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIteratorTest.java Wed Jan  2 13:55:23 2013
@@ -62,9 +62,11 @@ public final class SamplingLongPrimitive
 
   @Test
   public void testSample() {
+    double p = 0.1;
+    int n = 1000;
+    double sd = Math.sqrt(n * p * (1.0 - p));
     for (int i = 0; i < 1000; i++) {
-      SamplingLongPrimitiveIterator t = new SamplingLongPrimitiveIterator(
-          countingIterator(1000), 0.1);
+      SamplingLongPrimitiveIterator t = new SamplingLongPrimitiveIterator(countingIterator(n), p);
       int k = 0;
       while (t.hasNext()) {
         long v = t.nextLong();
@@ -72,9 +74,9 @@ public final class SamplingLongPrimitive
         assertTrue(v >= 0L);
         assertTrue(v < 1000L);
       }
-      double sd = Math.sqrt(0.9 * 0.1 * 1000);
-      assertTrue(k >= 100 - 4 * sd);
-      assertTrue(k <= 100 + 4 * sd);
+      // Should be +/- 5 standard deviations except in about 1 out of 1.7M cases
+      assertTrue(k >= 100 - 5 * sd);
+      assertTrue(k <= 100 + 5 * sd);
     }
   }
 

Modified: mahout/trunk/math/pom.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/math/pom.xml?rev=1427791&r1=1427790&r2=1427791&view=diff
==============================================================================
--- mahout/trunk/math/pom.xml (original)
+++ mahout/trunk/math/pom.xml Wed Jan  2 13:55:23 2013
@@ -148,7 +148,7 @@
     <!--  3rd-party -->
     <dependency>
       <groupId>org.apache.commons</groupId>
-      <artifactId>commons-math</artifactId>
+      <artifactId>commons-math3</artifactId>
     </dependency>
 
     <dependency>

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/random/PoissonSampler.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/random/PoissonSampler.java?rev=1427791&r1=1427790&r2=1427791&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/random/PoissonSampler.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/random/PoissonSampler.java Wed Jan  2 13:55:23 2013
@@ -18,8 +18,7 @@
 package org.apache.mahout.math.random;
 
 import com.google.common.collect.Lists;
-import org.apache.commons.math.distribution.PoissonDistribution;
-import org.apache.commons.math.distribution.PoissonDistributionImpl;
+import org.apache.commons.math3.distribution.PoissonDistribution;
 import org.apache.mahout.common.RandomUtils;
 
 import java.util.List;
@@ -29,34 +28,37 @@ import java.util.Random;
  * Samples from a Poisson distribution.  Should probably not be used for lambda > 1000 or so.
  */
 public final class PoissonSampler extends AbstractSamplerFunction {
-    private double limit = 1;
 
-    private Multinomial<Integer> partial;
-    private final Random gen;
-    private final PoissonDistribution pd;
-
-    public PoissonSampler(double lambda) {
-        gen = RandomUtils.getRandom();
-        pd = new PoissonDistributionImpl(lambda);
-    }
-
-    @Override
-    public Double sample() {
-        return sample(gen.nextDouble());
-    }
-
-    double sample(double u) {
-        if (u < limit) {
-            List<WeightedThing<Integer>> steps = Lists.newArrayList();
-            limit = 1;
-            for (int i = 0; u / 20 < limit; i++) {
-                double pdf = pd.probability(i);
-                limit -= pdf;
-                steps.add(new WeightedThing<Integer>(i, pdf));
-            }
-            steps.add(new WeightedThing<Integer>(steps.size(), limit));
-            partial = new Multinomial<Integer>(steps);
-        }
-        return partial.sample(u);
+  private double limit;
+  private Multinomial<Integer> partial;
+  private final Random gen;
+  private final PoissonDistribution pd;
+
+  public PoissonSampler(double lambda) {
+    limit = 1;
+    gen = RandomUtils.getRandom();
+    pd = new PoissonDistribution(lambda);
+  }
+
+  @Override
+  public Double sample() {
+    return sample(gen.nextDouble());
+  }
+
+  double sample(double u) {
+    if (u < limit) {
+      List<WeightedThing<Integer>> steps = Lists.newArrayList();
+      limit = 1;
+      int i = 0;
+      while (u / 20 < limit) {
+        double pdf = pd.probability(i);
+        limit -= pdf;
+        steps.add(new WeightedThing<Integer>(i, pdf));
+        i++;
+      }
+      steps.add(new WeightedThing<Integer>(steps.size(), limit));
+      partial = new Multinomial<Integer>(steps);
     }
+    return partial.sample(u);
+  }
 }

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/ssvd/EigenSolverWrapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/ssvd/EigenSolverWrapper.java?rev=1427791&r1=1427790&r2=1427791&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/ssvd/EigenSolverWrapper.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/ssvd/EigenSolverWrapper.java Wed Jan  2 13:55:23 2013
@@ -16,10 +16,9 @@
  */
 package org.apache.mahout.math.ssvd;
 
-import org.apache.commons.math.linear.Array2DRowRealMatrix;
-import org.apache.commons.math.linear.EigenDecomposition;
-import org.apache.commons.math.linear.EigenDecompositionImpl;
-import org.apache.commons.math.linear.RealMatrix;
+import org.apache.commons.math3.linear.Array2DRowRealMatrix;
+import org.apache.commons.math3.linear.EigenDecomposition;
+import org.apache.commons.math3.linear.RealMatrix;
 
 /**
  * wraps appropriate eigen solver for BBt matrix. Can be either colt or apache
@@ -44,8 +43,7 @@ public class EigenSolverWrapper {
 
   public EigenSolverWrapper(double[][] bbt) {
     int dim = bbt.length;
-    EigenDecomposition evd2 = new EigenDecompositionImpl(
-        new Array2DRowRealMatrix(bbt), 0);
+    EigenDecomposition evd2 = new EigenDecomposition(new Array2DRowRealMatrix(bbt));
     eigenvalues = evd2.getRealEigenvalues();
     RealMatrix uHatrm = evd2.getV();
     uHat = new double[dim][];

Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/DistributionChecks.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/DistributionChecks.java?rev=1427791&r1=1427790&r2=1427791&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/DistributionChecks.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/DistributionChecks.java Wed Jan  2 13:55:23 2013
@@ -17,11 +17,9 @@
 
 package org.apache.mahout.math.jet.random;
 
-import org.apache.commons.math.ConvergenceException;
-import org.apache.commons.math.FunctionEvaluationException;
-import org.apache.commons.math.analysis.UnivariateRealFunction;
-import org.apache.commons.math.analysis.integration.RombergIntegrator;
-import org.apache.commons.math.analysis.integration.UnivariateRealIntegrator;
+import org.apache.commons.math3.analysis.UnivariateFunction;
+import org.apache.commons.math3.analysis.integration.RombergIntegrator;
+import org.apache.commons.math3.analysis.integration.UnivariateIntegrator;
 import org.junit.Assert;
 
 import java.util.Arrays;
@@ -40,7 +38,7 @@ public final class DistributionChecks {
                                        double[] x,
                                        double offset,
                                        double scale,
-                                       int n) throws ConvergenceException, FunctionEvaluationException {
+                                       int n) {
     double[] xs = Arrays.copyOf(x, x.length);
     for (int i = 0; i < xs.length; i++) {
       xs[i] = xs[i]*scale+ offset;
@@ -78,9 +76,9 @@ public final class DistributionChecks {
     k[k.length - 1] = n - lastJ;
 
     // now verify probabilities by comparing to integral of pdf
-    UnivariateRealIntegrator integrator = new RombergIntegrator();
+    UnivariateIntegrator integrator = new RombergIntegrator();
     for (int i = 0; i < xs.length - 1; i++) {
-      double delta = integrator.integrate(new UnivariateRealFunction() {
+      double delta = integrator.integrate(1000000, new UnivariateFunction() {
         @Override
         public double value(double v) {
           return dist.pdf(v);

Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/random/NormalTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/random/NormalTest.java?rev=1427791&r1=1427790&r2=1427791&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/random/NormalTest.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/random/NormalTest.java Wed Jan  2 13:55:23 2013
@@ -17,9 +17,7 @@
 
 package org.apache.mahout.math.random;
 
-import org.apache.commons.math.MathException;
-import org.apache.commons.math.distribution.NormalDistribution;
-import org.apache.commons.math.distribution.NormalDistributionImpl;
+import org.apache.commons.math3.distribution.NormalDistribution;
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.math.MahoutTestCase;
 import org.apache.mahout.math.stats.OnlineSummarizer;
@@ -29,35 +27,35 @@ import org.junit.Test;
 import java.util.Arrays;
 
 public class NormalTest extends MahoutTestCase {
-    @Override
-    @Before
-    public void setUp() {
-        RandomUtils.useTestSeed();
-    }
-
-    @Test
-    public void testOffset() {
-        OnlineSummarizer s = new OnlineSummarizer();
-        Sampler<Double> sampler = new Normal(2, 5);
-        for (int i = 0; i < 10001; i++) {
-            s.add(sampler.sample());
-        }
 
-        assertEquals(String.format("m = %.3f, sd = %.3f", s.getMean(), s.getSD()), 2, s.getMean(), 0.04 * s.getSD());
-        assertEquals(5, s.getSD(), 0.12);
+  @Override
+  @Before
+  public void setUp() {
+    RandomUtils.useTestSeed();
+  }
+
+  @Test
+  public void testOffset() {
+    OnlineSummarizer s = new OnlineSummarizer();
+    Sampler<Double> sampler = new Normal(2, 5);
+    for (int i = 0; i < 10001; i++) {
+      s.add(sampler.sample());
     }
+    assertEquals(String.format("m = %.3f, sd = %.3f", s.getMean(), s.getSD()), 2, s.getMean(), 0.04 * s.getSD());
+    assertEquals(5, s.getSD(), 0.12);
+  }
+
+  @Test
+  public void testSample() throws Exception {
+    double[] data = new double[10001];
+    Sampler<Double> sampler = new Normal();
+    for (int i = 0; i < data.length; i++) {
+      data[i] = sampler.sample();
+    }
+    Arrays.sort(data);
 
-    @Test
-    public void testSample() throws MathException {
-        double[] data = new double[10001];
-        Sampler<Double> sampler = new Normal();
-        for (int i = 0; i < 10001; i++) {
-            data[i] = sampler.sample();
-        }
-        Arrays.sort(data);
-
-        NormalDistribution reference = new NormalDistributionImpl();
+    NormalDistribution reference = new NormalDistribution();
 
-        assertEquals("Median", reference.inverseCumulativeProbability(0.5), data[5000], 0.04);
-    }
+    assertEquals("Median", reference.inverseCumulativeProbability(0.5), data[5000], 0.04);
+  }
 }

Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/random/PoissonSamplerTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/random/PoissonSamplerTest.java?rev=1427791&r1=1427790&r2=1427791&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/random/PoissonSamplerTest.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/random/PoissonSamplerTest.java Wed Jan  2 13:55:23 2013
@@ -17,36 +17,36 @@
 
 package org.apache.mahout.math.random;
 
-import org.apache.commons.math.distribution.PoissonDistribution;
-import org.apache.commons.math.distribution.PoissonDistributionImpl;
+import org.apache.commons.math3.distribution.PoissonDistribution;
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.math.MahoutTestCase;
 import org.junit.Before;
 import org.junit.Test;
 
 public class PoissonSamplerTest extends MahoutTestCase {
-    @Override
-    @Before
-    public void setUp() {
-        RandomUtils.useTestSeed();
+
+  @Override
+  @Before
+  public void setUp() {
+    RandomUtils.useTestSeed();
+  }
+
+  @Test
+  public void testBasics() {
+    for (double alpha : new double[]{0.1, 1, 10, 100}) {
+      checkDistribution(new PoissonSampler(alpha), alpha);
     }
+  }
 
-    @Test
-    public void testBasics() {
-        for (double alpha : new double[]{0.1, 1, 10, 100}) {
-            checkDistribution(new PoissonSampler(alpha), alpha);
-        }
+  private static void checkDistribution(PoissonSampler pd, double alpha) {
+    int[] count = new int[(int) Math.max(10, 5 * alpha)];
+    for (int i = 0; i < 10000; i++) {
+      count[pd.sample().intValue()]++;
     }
 
-    private static void checkDistribution(PoissonSampler pd, double alpha) {
-        int[] count = new int[(int) Math.max(10, 5 * alpha)];
-        for (int i = 0; i < 10000; i++) {
-            count[pd.sample().intValue()]++;
-        }
-
-        PoissonDistribution ref = new PoissonDistributionImpl(alpha);
-        for (int i = 0; i < count.length; i++) {
-            assertEquals(ref.probability(i), count[i] / 10000.0, 2.0e-2);
-        }
+    PoissonDistribution ref = new PoissonDistribution(alpha);
+    for (int i = 0; i < count.length; i++) {
+      assertEquals(ref.probability(i), count[i] / 10000.0, 2.0e-2);
     }
+  }
 }

Modified: mahout/trunk/pom.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/pom.xml?rev=1427791&r1=1427790&r2=1427791&view=diff
==============================================================================
--- mahout/trunk/pom.xml (original)
+++ mahout/trunk/pom.xml Wed Jan  2 13:55:23 2013
@@ -380,8 +380,8 @@
       
       <dependency>
         <groupId>org.apache.commons</groupId>
-        <artifactId>commons-math</artifactId>
-        <version>2.2</version>
+        <artifactId>commons-math3</artifactId>
+        <version>3.1</version>
       </dependency>
       
       <dependency>