You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2013/01/02 14:55:23 UTC
svn commit: r1427791 - in /mahout/trunk: ./ core/
core/src/main/java/org/apache/mahout/cf/taste/impl/common/
core/src/main/java/org/apache/mahout/common/iterator/
core/src/test/java/org/apache/mahout/cf/taste/impl/common/ math/
math/src/main/java/org/a...
Author: srowen
Date: Wed Jan 2 13:55:23 2013
New Revision: 1427791
URL: http://svn.apache.org/viewvc?rev=1427791&view=rev
Log:
Update to Commons Math 3.1 and fix sampling iterator test
Modified:
mahout/trunk/core/pom.xml
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIterator.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/SamplingIterator.java
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIteratorTest.java
mahout/trunk/math/pom.xml
mahout/trunk/math/src/main/java/org/apache/mahout/math/random/PoissonSampler.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/ssvd/EigenSolverWrapper.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/DistributionChecks.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/random/NormalTest.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/random/PoissonSamplerTest.java
mahout/trunk/pom.xml
Modified: mahout/trunk/core/pom.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/core/pom.xml?rev=1427791&r1=1427790&r2=1427791&view=diff
==============================================================================
--- mahout/trunk/core/pom.xml (original)
+++ mahout/trunk/core/pom.xml Wed Jan 2 13:55:23 2013
@@ -184,7 +184,7 @@
<dependency>
<groupId>org.apache.commons</groupId>
- <artifactId>commons-math</artifactId>
+ <artifactId>commons-math3</artifactId>
</dependency>
<dependency>
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIterator.java?rev=1427791&r1=1427790&r2=1427791&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIterator.java Wed Jan 2 13:55:23 2013
@@ -20,8 +20,7 @@ package org.apache.mahout.cf.taste.impl.
import java.util.NoSuchElementException;
import com.google.common.base.Preconditions;
-import org.apache.commons.math.MathException;
-import org.apache.commons.math.distribution.PascalDistributionImpl;
+import org.apache.commons.math3.distribution.PascalDistribution;
/**
* Wraps a {@link LongPrimitiveIterator} and returns only some subset of the elements that it would,
@@ -29,7 +28,7 @@ import org.apache.commons.math.distribut
*/
public final class SamplingLongPrimitiveIterator extends AbstractLongPrimitiveIterator {
- private final PascalDistributionImpl geometricDistribution;
+ private final PascalDistribution geometricDistribution;
private final LongPrimitiveIterator delegate;
private long next;
private boolean hasNext;
@@ -38,7 +37,7 @@ public final class SamplingLongPrimitive
Preconditions.checkNotNull(delegate);
Preconditions.checkArgument(samplingRate > 0.0 && samplingRate <= 1.0);
// Geometric distribution is special case of negative binomial (aka Pascal) with r=1:
- geometricDistribution = new PascalDistributionImpl(1, samplingRate);
+ geometricDistribution = new PascalDistribution(1, samplingRate);
this.delegate = delegate;
this.hasNext = true;
doNext();
@@ -68,12 +67,7 @@ public final class SamplingLongPrimitive
}
private void doNext() {
- int toSkip;
- try {
- toSkip = geometricDistribution.sample();
- } catch (MathException e) {
- throw new IllegalStateException(e);
- }
+ int toSkip = geometricDistribution.sample();
delegate.skip(toSkip);
if (delegate.hasNext()) {
next = delegate.next();
@@ -93,12 +87,8 @@ public final class SamplingLongPrimitive
@Override
public void skip(int n) {
int toSkip = 0;
- try {
- for (int i = 0; i < n; i++) {
- toSkip += geometricDistribution.sample();
- }
- } catch (MathException e) {
- throw new IllegalStateException(e);
+ for (int i = 0; i < n; i++) {
+ toSkip += geometricDistribution.sample();
}
delegate.skip(toSkip);
if (delegate.hasNext()) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/SamplingIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/SamplingIterator.java?rev=1427791&r1=1427790&r2=1427791&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/SamplingIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/SamplingIterator.java Wed Jan 2 13:55:23 2013
@@ -21,8 +21,7 @@ import java.util.Iterator;
import com.google.common.base.Preconditions;
import com.google.common.collect.AbstractIterator;
-import org.apache.commons.math.MathException;
-import org.apache.commons.math.distribution.PascalDistributionImpl;
+import org.apache.commons.math3.distribution.PascalDistribution;
import org.apache.mahout.cf.taste.impl.common.SkippingIterator;
/**
@@ -31,25 +30,20 @@ import org.apache.mahout.cf.taste.impl.c
*/
public final class SamplingIterator<T> extends AbstractIterator<T> {
- private final PascalDistributionImpl geometricDistribution;
+ private final PascalDistribution geometricDistribution;
private final Iterator<? extends T> delegate;
public SamplingIterator(Iterator<? extends T> delegate, double samplingRate) {
Preconditions.checkNotNull(delegate);
Preconditions.checkArgument(samplingRate > 0.0 && samplingRate <= 1.0);
// Geometric distribution is special case of negative binomial (aka Pascal) with r=1:
- geometricDistribution = new PascalDistributionImpl(1, samplingRate);
+ geometricDistribution = new PascalDistribution(1, samplingRate);
this.delegate = delegate;
}
@Override
protected T computeNext() {
- int toSkip;
- try {
- toSkip = geometricDistribution.sample();
- } catch (MathException e) {
- throw new IllegalStateException(e);
- }
+ int toSkip = geometricDistribution.sample();
if (delegate instanceof SkippingIterator<?>) {
SkippingIterator<? extends T> skippingDelegate = (SkippingIterator<? extends T>) delegate;
skippingDelegate.skip(toSkip);
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIteratorTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIteratorTest.java?rev=1427791&r1=1427790&r2=1427791&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIteratorTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIteratorTest.java Wed Jan 2 13:55:23 2013
@@ -62,9 +62,11 @@ public final class SamplingLongPrimitive
@Test
public void testSample() {
+ double p = 0.1;
+ int n = 1000;
+ double sd = Math.sqrt(n * p * (1.0 - p));
for (int i = 0; i < 1000; i++) {
- SamplingLongPrimitiveIterator t = new SamplingLongPrimitiveIterator(
- countingIterator(1000), 0.1);
+ SamplingLongPrimitiveIterator t = new SamplingLongPrimitiveIterator(countingIterator(n), p);
int k = 0;
while (t.hasNext()) {
long v = t.nextLong();
@@ -72,9 +74,9 @@ public final class SamplingLongPrimitive
assertTrue(v >= 0L);
assertTrue(v < 1000L);
}
- double sd = Math.sqrt(0.9 * 0.1 * 1000);
- assertTrue(k >= 100 - 4 * sd);
- assertTrue(k <= 100 + 4 * sd);
+ // Should be +/- 5 standard deviations except in about 1 out of 1.7M cases
+ assertTrue(k >= 100 - 5 * sd);
+ assertTrue(k <= 100 + 5 * sd);
}
}
Modified: mahout/trunk/math/pom.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/math/pom.xml?rev=1427791&r1=1427790&r2=1427791&view=diff
==============================================================================
--- mahout/trunk/math/pom.xml (original)
+++ mahout/trunk/math/pom.xml Wed Jan 2 13:55:23 2013
@@ -148,7 +148,7 @@
<!-- 3rd-party -->
<dependency>
<groupId>org.apache.commons</groupId>
- <artifactId>commons-math</artifactId>
+ <artifactId>commons-math3</artifactId>
</dependency>
<dependency>
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/random/PoissonSampler.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/random/PoissonSampler.java?rev=1427791&r1=1427790&r2=1427791&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/random/PoissonSampler.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/random/PoissonSampler.java Wed Jan 2 13:55:23 2013
@@ -18,8 +18,7 @@
package org.apache.mahout.math.random;
import com.google.common.collect.Lists;
-import org.apache.commons.math.distribution.PoissonDistribution;
-import org.apache.commons.math.distribution.PoissonDistributionImpl;
+import org.apache.commons.math3.distribution.PoissonDistribution;
import org.apache.mahout.common.RandomUtils;
import java.util.List;
@@ -29,34 +28,37 @@ import java.util.Random;
* Samples from a Poisson distribution. Should probably not be used for lambda > 1000 or so.
*/
public final class PoissonSampler extends AbstractSamplerFunction {
- private double limit = 1;
- private Multinomial<Integer> partial;
- private final Random gen;
- private final PoissonDistribution pd;
-
- public PoissonSampler(double lambda) {
- gen = RandomUtils.getRandom();
- pd = new PoissonDistributionImpl(lambda);
- }
-
- @Override
- public Double sample() {
- return sample(gen.nextDouble());
- }
-
- double sample(double u) {
- if (u < limit) {
- List<WeightedThing<Integer>> steps = Lists.newArrayList();
- limit = 1;
- for (int i = 0; u / 20 < limit; i++) {
- double pdf = pd.probability(i);
- limit -= pdf;
- steps.add(new WeightedThing<Integer>(i, pdf));
- }
- steps.add(new WeightedThing<Integer>(steps.size(), limit));
- partial = new Multinomial<Integer>(steps);
- }
- return partial.sample(u);
+ private double limit;
+ private Multinomial<Integer> partial;
+ private final Random gen;
+ private final PoissonDistribution pd;
+
+ public PoissonSampler(double lambda) {
+ limit = 1;
+ gen = RandomUtils.getRandom();
+ pd = new PoissonDistribution(lambda);
+ }
+
+ @Override
+ public Double sample() {
+ return sample(gen.nextDouble());
+ }
+
+ double sample(double u) {
+ if (u < limit) {
+ List<WeightedThing<Integer>> steps = Lists.newArrayList();
+ limit = 1;
+ int i = 0;
+ while (u / 20 < limit) {
+ double pdf = pd.probability(i);
+ limit -= pdf;
+ steps.add(new WeightedThing<Integer>(i, pdf));
+ i++;
+ }
+ steps.add(new WeightedThing<Integer>(steps.size(), limit));
+ partial = new Multinomial<Integer>(steps);
}
+ return partial.sample(u);
+ }
}
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/ssvd/EigenSolverWrapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/ssvd/EigenSolverWrapper.java?rev=1427791&r1=1427790&r2=1427791&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/ssvd/EigenSolverWrapper.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/ssvd/EigenSolverWrapper.java Wed Jan 2 13:55:23 2013
@@ -16,10 +16,9 @@
*/
package org.apache.mahout.math.ssvd;
-import org.apache.commons.math.linear.Array2DRowRealMatrix;
-import org.apache.commons.math.linear.EigenDecomposition;
-import org.apache.commons.math.linear.EigenDecompositionImpl;
-import org.apache.commons.math.linear.RealMatrix;
+import org.apache.commons.math3.linear.Array2DRowRealMatrix;
+import org.apache.commons.math3.linear.EigenDecomposition;
+import org.apache.commons.math3.linear.RealMatrix;
/**
* wraps appropriate eigen solver for BBt matrix. Can be either colt or apache
@@ -44,8 +43,7 @@ public class EigenSolverWrapper {
public EigenSolverWrapper(double[][] bbt) {
int dim = bbt.length;
- EigenDecomposition evd2 = new EigenDecompositionImpl(
- new Array2DRowRealMatrix(bbt), 0);
+ EigenDecomposition evd2 = new EigenDecomposition(new Array2DRowRealMatrix(bbt));
eigenvalues = evd2.getRealEigenvalues();
RealMatrix uHatrm = evd2.getV();
uHat = new double[dim][];
Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/DistributionChecks.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/DistributionChecks.java?rev=1427791&r1=1427790&r2=1427791&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/DistributionChecks.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/DistributionChecks.java Wed Jan 2 13:55:23 2013
@@ -17,11 +17,9 @@
package org.apache.mahout.math.jet.random;
-import org.apache.commons.math.ConvergenceException;
-import org.apache.commons.math.FunctionEvaluationException;
-import org.apache.commons.math.analysis.UnivariateRealFunction;
-import org.apache.commons.math.analysis.integration.RombergIntegrator;
-import org.apache.commons.math.analysis.integration.UnivariateRealIntegrator;
+import org.apache.commons.math3.analysis.UnivariateFunction;
+import org.apache.commons.math3.analysis.integration.RombergIntegrator;
+import org.apache.commons.math3.analysis.integration.UnivariateIntegrator;
import org.junit.Assert;
import java.util.Arrays;
@@ -40,7 +38,7 @@ public final class DistributionChecks {
double[] x,
double offset,
double scale,
- int n) throws ConvergenceException, FunctionEvaluationException {
+ int n) {
double[] xs = Arrays.copyOf(x, x.length);
for (int i = 0; i < xs.length; i++) {
xs[i] = xs[i]*scale+ offset;
@@ -78,9 +76,9 @@ public final class DistributionChecks {
k[k.length - 1] = n - lastJ;
// now verify probabilities by comparing to integral of pdf
- UnivariateRealIntegrator integrator = new RombergIntegrator();
+ UnivariateIntegrator integrator = new RombergIntegrator();
for (int i = 0; i < xs.length - 1; i++) {
- double delta = integrator.integrate(new UnivariateRealFunction() {
+ double delta = integrator.integrate(1000000, new UnivariateFunction() {
@Override
public double value(double v) {
return dist.pdf(v);
Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/random/NormalTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/random/NormalTest.java?rev=1427791&r1=1427790&r2=1427791&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/random/NormalTest.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/random/NormalTest.java Wed Jan 2 13:55:23 2013
@@ -17,9 +17,7 @@
package org.apache.mahout.math.random;
-import org.apache.commons.math.MathException;
-import org.apache.commons.math.distribution.NormalDistribution;
-import org.apache.commons.math.distribution.NormalDistributionImpl;
+import org.apache.commons.math3.distribution.NormalDistribution;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.math.MahoutTestCase;
import org.apache.mahout.math.stats.OnlineSummarizer;
@@ -29,35 +27,35 @@ import org.junit.Test;
import java.util.Arrays;
public class NormalTest extends MahoutTestCase {
- @Override
- @Before
- public void setUp() {
- RandomUtils.useTestSeed();
- }
-
- @Test
- public void testOffset() {
- OnlineSummarizer s = new OnlineSummarizer();
- Sampler<Double> sampler = new Normal(2, 5);
- for (int i = 0; i < 10001; i++) {
- s.add(sampler.sample());
- }
- assertEquals(String.format("m = %.3f, sd = %.3f", s.getMean(), s.getSD()), 2, s.getMean(), 0.04 * s.getSD());
- assertEquals(5, s.getSD(), 0.12);
+ @Override
+ @Before
+ public void setUp() {
+ RandomUtils.useTestSeed();
+ }
+
+ @Test
+ public void testOffset() {
+ OnlineSummarizer s = new OnlineSummarizer();
+ Sampler<Double> sampler = new Normal(2, 5);
+ for (int i = 0; i < 10001; i++) {
+ s.add(sampler.sample());
}
+ assertEquals(String.format("m = %.3f, sd = %.3f", s.getMean(), s.getSD()), 2, s.getMean(), 0.04 * s.getSD());
+ assertEquals(5, s.getSD(), 0.12);
+ }
+
+ @Test
+ public void testSample() throws Exception {
+ double[] data = new double[10001];
+ Sampler<Double> sampler = new Normal();
+ for (int i = 0; i < data.length; i++) {
+ data[i] = sampler.sample();
+ }
+ Arrays.sort(data);
- @Test
- public void testSample() throws MathException {
- double[] data = new double[10001];
- Sampler<Double> sampler = new Normal();
- for (int i = 0; i < 10001; i++) {
- data[i] = sampler.sample();
- }
- Arrays.sort(data);
-
- NormalDistribution reference = new NormalDistributionImpl();
+ NormalDistribution reference = new NormalDistribution();
- assertEquals("Median", reference.inverseCumulativeProbability(0.5), data[5000], 0.04);
- }
+ assertEquals("Median", reference.inverseCumulativeProbability(0.5), data[5000], 0.04);
+ }
}
Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/random/PoissonSamplerTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/random/PoissonSamplerTest.java?rev=1427791&r1=1427790&r2=1427791&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/random/PoissonSamplerTest.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/random/PoissonSamplerTest.java Wed Jan 2 13:55:23 2013
@@ -17,36 +17,36 @@
package org.apache.mahout.math.random;
-import org.apache.commons.math.distribution.PoissonDistribution;
-import org.apache.commons.math.distribution.PoissonDistributionImpl;
+import org.apache.commons.math3.distribution.PoissonDistribution;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.math.MahoutTestCase;
import org.junit.Before;
import org.junit.Test;
public class PoissonSamplerTest extends MahoutTestCase {
- @Override
- @Before
- public void setUp() {
- RandomUtils.useTestSeed();
+
+ @Override
+ @Before
+ public void setUp() {
+ RandomUtils.useTestSeed();
+ }
+
+ @Test
+ public void testBasics() {
+ for (double alpha : new double[]{0.1, 1, 10, 100}) {
+ checkDistribution(new PoissonSampler(alpha), alpha);
}
+ }
- @Test
- public void testBasics() {
- for (double alpha : new double[]{0.1, 1, 10, 100}) {
- checkDistribution(new PoissonSampler(alpha), alpha);
- }
+ private static void checkDistribution(PoissonSampler pd, double alpha) {
+ int[] count = new int[(int) Math.max(10, 5 * alpha)];
+ for (int i = 0; i < 10000; i++) {
+ count[pd.sample().intValue()]++;
}
- private static void checkDistribution(PoissonSampler pd, double alpha) {
- int[] count = new int[(int) Math.max(10, 5 * alpha)];
- for (int i = 0; i < 10000; i++) {
- count[pd.sample().intValue()]++;
- }
-
- PoissonDistribution ref = new PoissonDistributionImpl(alpha);
- for (int i = 0; i < count.length; i++) {
- assertEquals(ref.probability(i), count[i] / 10000.0, 2.0e-2);
- }
+ PoissonDistribution ref = new PoissonDistribution(alpha);
+ for (int i = 0; i < count.length; i++) {
+ assertEquals(ref.probability(i), count[i] / 10000.0, 2.0e-2);
}
+ }
}
Modified: mahout/trunk/pom.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/pom.xml?rev=1427791&r1=1427790&r2=1427791&view=diff
==============================================================================
--- mahout/trunk/pom.xml (original)
+++ mahout/trunk/pom.xml Wed Jan 2 13:55:23 2013
@@ -380,8 +380,8 @@
<dependency>
<groupId>org.apache.commons</groupId>
- <artifactId>commons-math</artifactId>
- <version>2.2</version>
+ <artifactId>commons-math3</artifactId>
+ <version>3.1</version>
</dependency>
<dependency>