You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@commons.apache.org by oe...@apache.org on 2015/08/24 21:35:40 UTC

[1/2] [math] MATH-1220: random generator based on rejection-inversion sampling for Zipf distributions

Repository: commons-math
Updated Branches:
  refs/heads/MATH_3_X 32c5f8612 -> 484196fc2
  refs/heads/master 1c194a0dc -> 9c51e5316


MATH-1220: random generator based on rejection-inversion sampling for
Zipf distributions

Project: http://git-wip-us.apache.org/repos/asf/commons-math/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-math/commit/9c51e531
Tree: http://git-wip-us.apache.org/repos/asf/commons-math/tree/9c51e531
Diff: http://git-wip-us.apache.org/repos/asf/commons-math/diff/9c51e531

Branch: refs/heads/master
Commit: 9c51e5316babbd370bc32aed0fee134216726ec9
Parents: 1c194a0
Author: Otmar Ertl <ot...@gmail.com>
Authored: Mon Aug 24 20:16:07 2015 +0200
Committer: Otmar Ertl <ot...@gmail.com>
Committed: Mon Aug 24 20:59:04 2015 +0200

----------------------------------------------------------------------
 src/changes/changes.xml                         |   3 +
 .../math4/distribution/ZipfDistribution.java    | 248 +++++++++++--------
 .../distribution/ZipfDistributionTest.java      | 122 ++-------
 3 files changed, 162 insertions(+), 211 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-math/blob/9c51e531/src/changes/changes.xml
----------------------------------------------------------------------
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index afdaa0c..511faed 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -54,6 +54,9 @@ If the output is not quite correct, check for invisible trailing spaces!
     </release>
 
     <release version="4.0" date="XXXX-XX-XX" description="">
+      <action dev="oertl" type="update" issue="MATH-1220">
+        Faster generation of Zipf distributed random numbers by using rejection-inversion sampling.
+      </action>
       <action dev="oertl" type="update" issue="MATH-990">
         Improved performance of sort-in-place methods by avoiding boxing.
       </action>

http://git-wip-us.apache.org/repos/asf/commons-math/blob/9c51e531/src/main/java/org/apache/commons/math4/distribution/ZipfDistribution.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/math4/distribution/ZipfDistribution.java b/src/main/java/org/apache/commons/math4/distribution/ZipfDistribution.java
index be445cb..e44b4b5 100644
--- a/src/main/java/org/apache/commons/math4/distribution/ZipfDistribution.java
+++ b/src/main/java/org/apache/commons/math4/distribution/ZipfDistribution.java
@@ -46,7 +46,7 @@ public class ZipfDistribution extends AbstractIntegerDistribution {
     /** Whether or not the numerical variance has been calculated */
     private boolean numericalVarianceIsCalculated = false;
     /** The sampler to be used for the sample() method */
-    private transient ZipfRejectionSampler sampler;
+    private transient ZipfRejectionInversionSampler sampler;
 
     /**
      * Create a new Zipf distribution with the given number of elements and
@@ -271,123 +271,195 @@ public class ZipfDistribution extends AbstractIntegerDistribution {
         return true;
     }
 
-    /**
-     * {@inheritDoc}
-     * <p>
-     * An instrumental distribution g(k) is used to generate random values by
-     * rejection sampling. g(k) is defined as g(1):= 1 and g(k) := I(-s,k-1/2,k+1/2)
-     * for k larger than 1, where s denotes the exponent of the Zipf distribution
-     * and I(r,a,b) is the integral of x^r for x from a to b.
-     * <p>
-     * Since 1^x^s is a convex function, Jensens's inequality gives
-     * I(-s,k-1/2,k+1/2) >= 1/k^s for all positive k and non-negative s.
-     * In order to limit the rejection rate for large exponents s,
-     * the instrumental distribution weight is differently defined for value 1.
-     */
     @Override
     public int sample() {
         if (sampler == null) {
-            sampler = new ZipfRejectionSampler(numberOfElements, exponent);
+            sampler = new ZipfRejectionInversionSampler(numberOfElements, exponent);
         }
         return sampler.sample(random);
     }
 
     /**
-     * Utility class implementing a rejection sampling method for a discrete,
-     * bounded Zipf distribution.
+     * Utility class implementing a rejection inversion sampling method for a discrete,
+     * bounded Zipf distribution that is based on the method described in
+     * <p>
+     * Wolfgang Hörmann and Gerhard Derflinger
+     * "Rejection-inversion to generate variates from monotone discrete distributions."
+     * ACM Transactions on Modeling and Computer Simulation (TOMACS) 6.3 (1996): 169-184.
+     * <p>
+     * The paper describes an algorithm for exponents larger than 1 (Algorithm ZRI).
+     * The original method uses {@code H(x) := (v + x)^(1 - q) / (1 - q)}
+     * as the integral of the hat function. This function is undefined for
+     * q = 1, which is the reason for the limitation of the exponent.
+     * If instead the integral function
+     * {@code H(x) := ((v + x)^(1 - q) - 1) / (1 - q)} is used,
+     * for which a meaningful limit exists for q = 1,
+     * the method works for all positive exponents.
+     * <p>
+     * The following implementation uses v := 0 and generates integral numbers
+     * in the range [1, numberOfElements]. This is different to the original method
+     * where v is defined to be positive and numbers are taken from [0, i_max].
+     * This explains why the implementation looks slightly different.
      *
      * @since 3.6
      */
-    static final class ZipfRejectionSampler {
+    static final class ZipfRejectionInversionSampler {
 
-        /** Number of elements. */
-        private final int numberOfElements;
         /** Exponent parameter of the distribution. */
         private final double exponent;
-        /** Cached tail weight of instrumental distribution used for rejection sampling */
-        private double instrumentalDistributionTailWeight = Double.NaN;
-
-        /**
-         * Simple constructor.
-         * @param numberOfElements number of elements
-         * @param exponent exponent parameter of the distribution
-         */
-        ZipfRejectionSampler(final int numberOfElements, final double exponent) {
-            this.numberOfElements = numberOfElements;
+        /** Number of elements. */
+        private final int numberOfElements;
+        /** Constant equal to {@code hIntegral(1.5) - 1}. */
+        private final double hIntegralX1;
+        /** Constant equal to {@code hIntegral(numberOfElements + 0.5)}. */
+        private final double hIntegralNumberOfElements;
+        /** Constant equal to {@code 2 - hIntegralInverse(hIntegral(2.5) - h(2)}. */
+        private final double s;
+
+        ZipfRejectionInversionSampler(final int numberOfElements, final double exponent) {
             this.exponent = exponent;
+            this.numberOfElements = numberOfElements;
+            this.hIntegralX1 = hIntegral(1.5) - 1d;
+            this.hIntegralNumberOfElements = hIntegral(numberOfElements + 0.5);
+            this.s = 2d - hIntegralInverse(hIntegral(2.5) - h(2));
         }
 
-        /** Generate a random value sampled from this distribution.
-         * @param random random generator to use
-         * @return random value sampled from this distribution
-         */
         int sample(final RandomGenerator random) {
-            if (Double.isNaN(instrumentalDistributionTailWeight)) {
-                instrumentalDistributionTailWeight = integratePowerFunction(-exponent, 1.5, numberOfElements+0.5);
-            }
-
             while(true) {
-                final double randomValue = random.nextDouble()*(instrumentalDistributionTailWeight + 1.);
-                if (randomValue < instrumentalDistributionTailWeight) {
-                    final double q = randomValue / instrumentalDistributionTailWeight;
-                    final int sample = sampleFromInstrumentalDistributionTail(q);
-                    if (random.nextDouble() < acceptanceRateForTailSample(sample)) {
-                        return sample;
-                    }
+
+                final double u = hIntegralNumberOfElements + random.nextDouble() * (hIntegralX1 - hIntegralNumberOfElements);
+                // u is uniformly distributed in (hIntegralX1, hIntegralNumberOfElements]
+
+                double x = hIntegralInverse(u);
+
+                int k = (int)(x + 0.5);
+
+                // Limit k to the range [1, numberOfElements]
+                // (k could be outside due to numerical inaccuracies)
+                if (k < 1) {
+                    k = 1;
                 }
-                else {
-                    return 1;
+                else if (k > numberOfElements) {
+                    k = numberOfElements;
+                }
+
+                // Here, the distribution of k is given by:
+                //
+                //   P(k = 1) = C * (hIntegral(1.5) - hIntegralX1) = C
+                //   P(k = m) = C * (hIntegral(m + 1/2) - hIntegral(m - 1/2)) for m >= 2
+                //
+                //   where C := 1 / (hIntegralNumberOfElements - hIntegralX1)
+
+                if (k - x <= s || u >= hIntegral(k + 0.5) - h(k)) {
+
+                    // Case k = 1:
+                    //
+                    //   The right inequality is always true, because replacing k by 1 gives
+                    //   u >= hIntegral(1.5) - h(1) = hIntegralX1 and u is taken from
+                    //   (hIntegralX1, hIntegralNumberOfElements].
+                    //
+                    //   Therefore, the acceptance rate for k = 1 is P(accepted | k = 1) = 1
+                    //   and the probability that 1 is returned as random value is
+                    //   P(k = 1 and accepted) = P(accepted | k = 1) * P(k = 1) = C = C / 1^exponent
+                    //
+                    // Case k >= 2:
+                    //
+                    //   The left inequality (k - x <= s) is just a short cut
+                    //   to avoid the more expensive evaluation of the right inequality
+                    //   (u >= hIntegral(k + 0.5) - h(k)) in many cases.
+                    //
+                    //   If the left inequality is true, the right inequality is also true:
+                    //     Theorem 2 in the paper is valid for all positive exponents, because
+                    //     the requirements h'(x) = -exponent/x^(exponent + 1) < 0 and
+                    //     (-1/hInverse'(x))'' = (1+1/exponent) * x^(1/exponent-1) >= 0
+                    //     are both fulfilled.
+                    //     Therefore, f(x) := x - hIntegralInverse(hIntegral(x + 0.5) - h(x))
+                    //     is a non-decreasing function. If k - x <= s holds,
+                    //     k - x <= s + f(k) - f(2) is obviously also true which is equivalent to
+                    //     -x <= -hIntegralInverse(hIntegral(k + 0.5) - h(k)),
+                    //     -hIntegralInverse(u) <= -hIntegralInverse(hIntegral(k + 0.5) - h(k)),
+                    //     and finally u >= hIntegral(k + 0.5) - h(k).
+                    //
+                    //   Hence, the right inequality determines the acceptance rate:
+                    //   P(accepted | k = m) = h(m) / (hIntegrated(m+1/2) - hIntegrated(m-1/2))
+                    //   The probability that m is returned is given by
+                    //   P(k = m and accepted) = P(accepted | k = m) * P(k = m) = C * h(m) = C / m^exponent.
+                    //
+                    // In both cases the probabilities are proportional to the probability mass function
+                    // of the Zipf distribution.
+
+                    return k;
                 }
             }
         }
 
         /**
-         * Returns a sample from the instrumental distribution tail for a given
-         * uniformly distributed random value.
+         * {@code H(x) :=}
+         * <ul>
+         * <li>{@code (x^(1-exponent) - 1)/(1 - exponent)}, if {@code exponent != 1}</li>
+         * <li>{@code log(x)}, if {@code exponent == 1}</li>
+         * </ul>
+         * H(x) is an integral function of h(x),
+         * the derivative of H(x) is h(x).
+         *
+         * @param x free parameter
+         * @return {@code H(x)}
+         */
+        private double hIntegral(final double x) {
+            final double logX = FastMath.log(x);
+            return helper2((1d-exponent)*logX)*logX;
+        }
+
+        /**
+         * {@code h(x) := 1/x^exponent}
          *
-         * @param q a uniformly distributed random value taken from [0,1]
-         * @return a sample in the range [2, {@link #numberOfElements}]
+         * @param x free parameter
+         * @return h(x)
          */
-        int sampleFromInstrumentalDistributionTail(double q) {
-            final double a = 1.5;
-            final double b = numberOfElements + 0.5;
-            final double logBdviA = FastMath.log(b / a);
-
-            final int result  = (int) (a * FastMath.exp(logBdviA * helper1(q, logBdviA * (1. - exponent))) + 0.5);
-            if (result < 2) {
-                return 2;
-            }
-            if (result > numberOfElements) {
-                return numberOfElements;
+        private double h(final double x) {
+            return FastMath.exp(-exponent * FastMath.log(x));
+        }
+
+        /**
+         * The inverse function of H(x).
+         *
+         * @param x free parameter
+         * @return y for which {@code H(y) = x}
+         */
+        private double hIntegralInverse(final double x) {
+            double t = x*(1d-exponent);
+            if (t < -1d) {
+                // Limit value to the range [-1, +inf).
+                // t could be smaller than -1 in some rare cases due to numerical errors.
+                t = -1;
             }
-            return result;
+            return FastMath.exp(helper1(t)*x);
         }
 
         /**
-         * Helper function that calculates log((1-q)+q*exp(x))/x.
+         * Helper function that calculates {@code log(1+x)/x}.
          * <p>
          * A Taylor series expansion is used, if x is close to 0.
          *
-         * @param q a value in the range [0,1]
-         * @param x free parameter
-         * @return log((1-q)+q*exp(x))/x
+         * @param x a value larger than or equal to -1
+         * @return {@code log(1+x)/x}
          */
-        static double helper1(final double q, final double x) {
-            if (Math.abs(x) > 1e-8) {
-                return FastMath.log((1.-q)+q*FastMath.exp(x))/x;
+        static double helper1(final double x) {
+            if (FastMath.abs(x)>1e-8) {
+                return FastMath.log1p(x)/x;
             }
             else {
-                return q*(1.+(1./2.)*x*(1.-q)*(1+(1./3.)*x*((1.-2.*q) + (1./4.)*x*(6*q*q*(q-1)+1))));
+                return 1.-x*((1./2.)-x*((1./3.)-x*(1./4.)));
             }
         }
 
         /**
-         * Helper function to calculate (exp(x)-1)/x.
+         * Helper function to calculate {@code (exp(x)-1)/x}.
          * <p>
          * A Taylor series expansion is used, if x is close to 0.
          *
          * @param x free parameter
-         * @return (exp(x)-1)/x if x is non-zero, 1 if x=0
+         * @return {@code (exp(x)-1)/x} if x is non-zero, or 1 if x=0
          */
         static double helper2(final double x) {
             if (FastMath.abs(x)>1e-8) {
@@ -397,35 +469,5 @@ public class ZipfDistribution extends AbstractIntegerDistribution {
                 return 1.+x*(1./2.)*(1.+x*(1./3.)*(1.+x*(1./4.)));
             }
         }
-
-        /**
-         * Integrates the power function x^r from x=a to b.
-         *
-         * @param r the exponent
-         * @param a the integral lower bound
-         * @param b the integral upper bound
-         * @return the calculated integral value
-         */
-        static double integratePowerFunction(final double r, final double a, final double b) {
-            final double logA = FastMath.log(a);
-            final double logBdivA = FastMath.log(b/a);
-            return FastMath.exp((1.+r)*logA)*helper2((1.+r)*logBdivA)*logBdivA;
-
-        }
-
-        /**
-         * Calculates the acceptance rate for a sample taken from the tail of the instrumental distribution.
-         * <p>
-         * The acceptance rate is given by the ratio k^(-s)/I(-s,k-0.5, k+0.5)
-         * where I(r,a,b) is the integral of x^r for x from a to b.
-         *
-         * @param k the value which has been sampled using the instrumental distribution
-         * @return the acceptance rate
-         */
-        double acceptanceRateForTailSample(int k) {
-            final double a = FastMath.log1p(1./(2.*k-1.));
-            final double b = FastMath.log1p(2./(2.*k-1.));
-            return FastMath.exp((1.-exponent)*a)/(k*b*helper2((1.-exponent)*b));
-        }
     }
 }

http://git-wip-us.apache.org/repos/asf/commons-math/blob/9c51e531/src/test/java/org/apache/commons/math4/distribution/ZipfDistributionTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/math4/distribution/ZipfDistributionTest.java b/src/test/java/org/apache/commons/math4/distribution/ZipfDistributionTest.java
index 44bad94..eeae81f 100644
--- a/src/test/java/org/apache/commons/math4/distribution/ZipfDistributionTest.java
+++ b/src/test/java/org/apache/commons/math4/distribution/ZipfDistributionTest.java
@@ -17,15 +17,8 @@
 
 package org.apache.commons.math4.distribution;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
 import org.apache.commons.math4.TestUtils;
-import org.apache.commons.math4.analysis.UnivariateFunction;
-import org.apache.commons.math4.analysis.integration.SimpsonIntegrator;
-import org.apache.commons.math4.distribution.IntegerDistribution;
-import org.apache.commons.math4.distribution.ZipfDistribution;
-import org.apache.commons.math4.distribution.ZipfDistribution.ZipfRejectionSampler;
+import org.apache.commons.math4.distribution.ZipfDistribution.ZipfRejectionInversionSampler;
 import org.apache.commons.math4.exception.NotStrictlyPositiveException;
 import org.apache.commons.math4.random.AbstractRandomGenerator;
 import org.apache.commons.math4.random.RandomGenerator;
@@ -143,9 +136,9 @@ public class ZipfDistributionTest extends IntegerDistributionAbstractTest {
             2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100
         };
         double[] exponentValues = {
-            1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1,
+            1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 2e-1, 5e-1,
             1. - 1e-9, 1.0, 1. + 1e-9, 1.1, 1.2, 1.3, 1.5, 1.6, 1.7, 1.8, 2.0,
-            2.5, 3.0, 4., 5., 6., 7., 8., 9., 10., 20., 30.
+            2.5, 3.0, 4., 5., 6., 7., 8., 9., 10., 20., 30., 100., 150.
         };
 
         for (int numPoints : numPointsValues) {
@@ -175,110 +168,23 @@ public class ZipfDistributionTest extends IntegerDistributionAbstractTest {
     }
 
     @Test
-    public void testSamplerIntegratePowerFunction() {
-        final double tol = 1e-6;
-        final double[] exponents = {
-            -1e-5, -1e-4, -1e-3, -1e-2, -1e-1, -1e0, -1e1
-        };
-        final double[] limits = {
-            0.5, 1., 1.5, 2., 2.5, 3., 3.5, 4., 4.5, 5., 5.5, 6.0, 6.5, 7.0,
-            7.5, 8.0, 8.5, 9.0, 9.5, 10.0
-        };
-
-        for (final double exponent : exponents) {
-            for (int lowerLimitIndex = 0; lowerLimitIndex < limits.length; ++lowerLimitIndex) {
-                final double lowerLimit = limits[lowerLimitIndex];
-                for (int upperLimitIndex = lowerLimitIndex+1; upperLimitIndex < limits.length; ++upperLimitIndex) {
-                    final double upperLimit = limits[upperLimitIndex];
-                    final double result1 = new SimpsonIntegrator().integrate(10000, new UnivariateFunction() {
-                        @Override
-                        public double value(double x) {
-                            return Math.pow(x, exponent);
-                        }
-                    }, lowerLimit, upperLimit);
-
-                    final double result2 =
-                            ZipfRejectionSampler.integratePowerFunction(exponent, lowerLimit, upperLimit);
-                    assertEquals(result1, result2, (result1+result2)*tol);
-                }
-            }
-        }
-    }
-
-    @Test
-    public void testSamplerAcceptanceRate() {
+    public void testSamplerHelper1() {
         final double tol = 1e-12;
-        final double[] exponents = {
-            1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 2e0, 5e0, 1e1, 1e2, 1e3
-        };
-        final int[] values = {
-            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
+        final double[] testValues = {
+            Math.nextUp(-1.), -1e-1, -1e-2, -1e-3, -1e-4, -1e-5, -1e-6, -1e-7, -1e-8,
+            -1e-9, -1e-10, -1e-11, 0., 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6,
+            1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0
         };
-        final int numberOfElements = 1000;
-        for (final double exponent : exponents) {
-            ZipfRejectionSampler sampler = new ZipfRejectionSampler(numberOfElements, exponent);
-            for (final int value : values) {
-                double expected = FastMath.pow(value, -exponent);
-                double result = sampler.acceptanceRateForTailSample(value) *
-                                ZipfRejectionSampler.integratePowerFunction(-exponent, value - 0.5, value + 0.5);
-                TestUtils.assertRelativelyEquals(expected, result, tol);
-                assertTrue(result <= 1.); // test Jensen's inequality
-            }
+        for (final double testValue : testValues) {
+            final double expected = FastMath.log1p(testValue);
+            TestUtils.assertRelativelyEquals(expected, ZipfRejectionInversionSampler.helper1(testValue)*testValue, tol);
         }
     }
 
-    @Test
-    public void testSamplerInverseInstrumentalDistribution() {
-        final double tol = 1e-14;
-        final double[] exponentValues = {
-            1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 2E0, 3e0, 4e0, 5e0, 6., 7., 8., 9., 10., 50.
-        };
-        final double[] qValues = {
-            0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0
-        };
-        final int[] numberOfElementsValues = {
-            2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100
-        };
-
-        for (final double exponent : exponentValues) {
-            for (final int numberOfElements : numberOfElementsValues) {
-                final ZipfRejectionSampler sampler = new ZipfRejectionSampler(numberOfElements, exponent);
-                for (final double q : qValues) {
-                    int result = sampler.sampleFromInstrumentalDistributionTail(q);
-                    double total =
-                            ZipfRejectionSampler.integratePowerFunction(-exponent, 1.5, numberOfElements + 0.5);
-                    double lowerBound =
-                            ZipfRejectionSampler.integratePowerFunction(-exponent, 1.5, result - 0.5) / total;
-                    double upperBound =
-                            ZipfRejectionSampler.integratePowerFunction(-exponent, 1.5, result + 0.5) / total;
-                    assertTrue(lowerBound <= q*(1.+tol));
-                    assertTrue(upperBound >= q*(1.-tol));
-                }
-            }
-        }
-    }
 
     @Test
-    public void testSamplerHelper1() {
-        final double tol = 1e-14;
-        final double[] qValues = {
-            0., 1e-12, 1e-11, 1e-10, 1e-9, 9e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4,
-            1e-3, 1e-2, 1e-1, 1e0
-        };
-        final double[] xValues = {
-            -Double.MAX_VALUE, -1e10, -1e9, -1e8, -1e7, -1e6, -1e5, -1e4, -1e3,
-            -1e2, -1e1, -1e0, -1e-1, -1e-2, -1e-3, -1e-4, -1e-5, -1e-6, -1e-7,
-            -1e-8, -1e-9, -1e-10, -Double.MIN_VALUE, 0.0, Double.MIN_VALUE,
-            1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0,
-            1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, Double.MAX_VALUE
-        };
-
-        for (final double q : qValues) {
-            for(final double x : xValues) {
-                double calculated = ZipfRejectionSampler.helper1(q, x);
-                TestUtils.assertRelativelyEquals((1.-q)+q*Math.exp(x), FastMath.exp(calculated*x), tol);
-            }
-        }
+    public void testSamplerHelper1Minus1() {
+        Assert.assertEquals(Double.POSITIVE_INFINITY, ZipfRejectionInversionSampler.helper1(-1d), 0d);
     }
 
     @Test
@@ -291,7 +197,7 @@ public class ZipfDistributionTest extends IntegerDistributionAbstractTest {
         };
         for (double testValue : testValues) {
             final double expected = FastMath.expm1(testValue);
-            TestUtils.assertRelativelyEquals(expected, ZipfRejectionSampler.helper2(testValue)*testValue, tol);
+            TestUtils.assertRelativelyEquals(expected, ZipfRejectionInversionSampler.helper2(testValue)*testValue, tol);
         }
     }

[2/2] [math] MATH-1220: random generator based on rejection-inversion sampling for Zipf distributions

Posted by oe...@apache.org.

MATH-1220: random generator based on rejection-inversion sampling for
Zipf distributions

Project: http://git-wip-us.apache.org/repos/asf/commons-math/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-math/commit/484196fc
Tree: http://git-wip-us.apache.org/repos/asf/commons-math/tree/484196fc
Diff: http://git-wip-us.apache.org/repos/asf/commons-math/diff/484196fc

Branch: refs/heads/MATH_3_X
Commit: 484196fc2e11b632964895b6662dc783d4538b35
Parents: 32c5f86
Author: Otmar Ertl <ot...@gmail.com>
Authored: Mon Aug 24 20:16:07 2015 +0200
Committer: Otmar Ertl <ot...@gmail.com>
Committed: Mon Aug 24 21:30:56 2015 +0200

----------------------------------------------------------------------
 src/changes/changes.xml                         |   3 +
 .../math3/distribution/ZipfDistribution.java    | 251 +++++++++++--------
 .../distribution/ZipfDistributionTest.java      | 123 ++-------
 3 files changed, 164 insertions(+), 213 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-math/blob/484196fc/src/changes/changes.xml
----------------------------------------------------------------------
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 63b38eb..b3998be 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -51,6 +51,9 @@ If the output is not quite correct, check for invisible trailing spaces!
   </properties>
   <body>
     <release version="3.6" date="XXXX-XX-XX" description="">
+      <action dev="oertl" type="update" issue="MATH-1220">
+        Faster generation of Zipf distributed random numbers by using rejection-inversion sampling.
+      </action>
       <action dev="oertl" type="update" issue="MATH-990">
         Improved performance of sort-in-place methods by avoiding boxing.
       </action>

http://git-wip-us.apache.org/repos/asf/commons-math/blob/484196fc/src/main/java/org/apache/commons/math3/distribution/ZipfDistribution.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/math3/distribution/ZipfDistribution.java b/src/main/java/org/apache/commons/math3/distribution/ZipfDistribution.java
index 3161293..bd43d35 100644
--- a/src/main/java/org/apache/commons/math3/distribution/ZipfDistribution.java
+++ b/src/main/java/org/apache/commons/math3/distribution/ZipfDistribution.java
@@ -44,7 +44,7 @@ public class ZipfDistribution extends AbstractIntegerDistribution {
     /** Whether or not the numerical variance has been calculated */
     private boolean numericalVarianceIsCalculated = false;
     /** The sampler to be used for the sample() method */
-    private transient ZipfRejectionSampler sampler;
+    private transient ZipfRejectionInversionSampler sampler;
 
     /**
      * Create a new Zipf distribution with the given number of elements and
@@ -123,7 +123,6 @@ public class ZipfDistribution extends AbstractIntegerDistribution {
     }
 
     /** {@inheritDoc} */
-    @Override
     public double logProbability(int x) {
         if (x <= 0 || x > numberOfElements) {
             return Double.NEGATIVE_INFINITY;
@@ -261,123 +260,194 @@ public class ZipfDistribution extends AbstractIntegerDistribution {
         return true;
     }
 
-    /**
-     * {@inheritDoc}
-     * <p>
-     * An instrumental distribution g(k) is used to generate random values by
-     * rejection sampling. g(k) is defined as g(1):= 1 and g(k) := I(-s,k-1/2,k+1/2)
-     * for k larger than 1, where s denotes the exponent of the Zipf distribution
-     * and I(r,a,b) is the integral of x^r for x from a to b.
-     * <p>
-     * Since 1^x^s is a convex function, Jensens's inequality gives
-     * I(-s,k-1/2,k+1/2) >= 1/k^s for all positive k and non-negative s.
-     * In order to limit the rejection rate for large exponents s,
-     * the instrumental distribution weight is differently defined for value 1.
-     */
-    @Override
     public int sample() {
         if (sampler == null) {
-            sampler = new ZipfRejectionSampler(numberOfElements, exponent);
+            sampler = new ZipfRejectionInversionSampler(numberOfElements, exponent);
         }
         return sampler.sample(random);
     }
 
     /**
-     * Utility class implementing a rejection sampling method for a discrete,
-     * bounded Zipf distribution.
+     * Utility class implementing a rejection inversion sampling method for a discrete,
+     * bounded Zipf distribution that is based on the method described in
+     * <p>
+     * Wolfgang Hörmann and Gerhard Derflinger
+     * "Rejection-inversion to generate variates from monotone discrete distributions."
+     * ACM Transactions on Modeling and Computer Simulation (TOMACS) 6.3 (1996): 169-184.
+     * <p>
+     * The paper describes an algorithm for exponents larger than 1 (Algorithm ZRI).
+     * The original method uses {@code H(x) := (v + x)^(1 - q) / (1 - q)}
+     * as the integral of the hat function. This function is undefined for
+     * q = 1, which is the reason for the limitation of the exponent.
+     * If instead the integral function
+     * {@code H(x) := ((v + x)^(1 - q) - 1) / (1 - q)} is used,
+     * for which a meaningful limit exists for q = 1,
+     * the method works for all positive exponents.
+     * <p>
+     * The following implementation uses v := 0 and generates integral numbers
+     * in the range [1, numberOfElements]. This is different to the original method
+     * where v is defined to be positive and numbers are taken from [0, i_max].
+     * This explains why the implementation looks slightly different.
      *
      * @since 3.6
      */
-    static final class ZipfRejectionSampler {
+    static final class ZipfRejectionInversionSampler {
 
-        /** Number of elements. */
-        private final int numberOfElements;
         /** Exponent parameter of the distribution. */
         private final double exponent;
-        /** Cached tail weight of instrumental distribution used for rejection sampling */
-        private double instrumentalDistributionTailWeight = Double.NaN;
-
-        /**
-         * Simple constructor.
-         * @param numberOfElements number of elements
-         * @param exponent exponent parameter of the distribution
-         */
-        ZipfRejectionSampler(final int numberOfElements, final double exponent) {
-            this.numberOfElements = numberOfElements;
+        /** Number of elements. */
+        private final int numberOfElements;
+        /** Constant equal to {@code hIntegral(1.5) - 1}. */
+        private final double hIntegralX1;
+        /** Constant equal to {@code hIntegral(numberOfElements + 0.5)}. */
+        private final double hIntegralNumberOfElements;
+        /** Constant equal to {@code 2 - hIntegralInverse(hIntegral(2.5) - h(2)}. */
+        private final double s;
+
+        ZipfRejectionInversionSampler(final int numberOfElements, final double exponent) {
             this.exponent = exponent;
+            this.numberOfElements = numberOfElements;
+            this.hIntegralX1 = hIntegral(1.5) - 1d;
+            this.hIntegralNumberOfElements = hIntegral(numberOfElements + 0.5);
+            this.s = 2d - hIntegralInverse(hIntegral(2.5) - h(2));
         }
 
-        /** Generate a random value sampled from this distribution.
-         * @param random random generator to use
-         * @return random value sampled from this distribution
-         */
         int sample(final RandomGenerator random) {
-            if (Double.isNaN(instrumentalDistributionTailWeight)) {
-                instrumentalDistributionTailWeight = integratePowerFunction(-exponent, 1.5, numberOfElements+0.5);
-            }
-
             while(true) {
-                final double randomValue = random.nextDouble()*(instrumentalDistributionTailWeight + 1.);
-                if (randomValue < instrumentalDistributionTailWeight) {
-                    final double q = randomValue / instrumentalDistributionTailWeight;
-                    final int sample = sampleFromInstrumentalDistributionTail(q);
-                    if (random.nextDouble() < acceptanceRateForTailSample(sample)) {
-                        return sample;
-                    }
+
+                final double u = hIntegralNumberOfElements + random.nextDouble() * (hIntegralX1 - hIntegralNumberOfElements);
+                // u is uniformly distributed in (hIntegralX1, hIntegralNumberOfElements]
+
+                double x = hIntegralInverse(u);
+
+                int k = (int)(x + 0.5);
+
+                // Limit k to the range [1, numberOfElements]
+                // (k could be outside due to numerical inaccuracies)
+                if (k < 1) {
+                    k = 1;
                 }
-                else {
-                    return 1;
+                else if (k > numberOfElements) {
+                    k = numberOfElements;
+                }
+
+                // Here, the distribution of k is given by:
+                //
+                //   P(k = 1) = C * (hIntegral(1.5) - hIntegralX1) = C
+                //   P(k = m) = C * (hIntegral(m + 1/2) - hIntegral(m - 1/2)) for m >= 2
+                //
+                //   where C := 1 / (hIntegralNumberOfElements - hIntegralX1)
+
+                if (k - x <= s || u >= hIntegral(k + 0.5) - h(k)) {
+
+                    // Case k = 1:
+                    //
+                    //   The right inequality is always true, because replacing k by 1 gives
+                    //   u >= hIntegral(1.5) - h(1) = hIntegralX1 and u is taken from
+                    //   (hIntegralX1, hIntegralNumberOfElements].
+                    //
+                    //   Therefore, the acceptance rate for k = 1 is P(accepted | k = 1) = 1
+                    //   and the probability that 1 is returned as random value is
+                    //   P(k = 1 and accepted) = P(accepted | k = 1) * P(k = 1) = C = C / 1^exponent
+                    //
+                    // Case k >= 2:
+                    //
+                    //   The left inequality (k - x <= s) is just a short cut
+                    //   to avoid the more expensive evaluation of the right inequality
+                    //   (u >= hIntegral(k + 0.5) - h(k)) in many cases.
+                    //
+                    //   If the left inequality is true, the right inequality is also true:
+                    //     Theorem 2 in the paper is valid for all positive exponents, because
+                    //     the requirements h'(x) = -exponent/x^(exponent + 1) < 0 and
+                    //     (-1/hInverse'(x))'' = (1+1/exponent) * x^(1/exponent-1) >= 0
+                    //     are both fulfilled.
+                    //     Therefore, f(x) := x - hIntegralInverse(hIntegral(x + 0.5) - h(x))
+                    //     is a non-decreasing function. If k - x <= s holds,
+                    //     k - x <= s + f(k) - f(2) is obviously also true which is equivalent to
+                    //     -x <= -hIntegralInverse(hIntegral(k + 0.5) - h(k)),
+                    //     -hIntegralInverse(u) <= -hIntegralInverse(hIntegral(k + 0.5) - h(k)),
+                    //     and finally u >= hIntegral(k + 0.5) - h(k).
+                    //
+                    //   Hence, the right inequality determines the acceptance rate:
+                    //   P(accepted | k = m) = h(m) / (hIntegrated(m+1/2) - hIntegrated(m-1/2))
+                    //   The probability that m is returned is given by
+                    //   P(k = m and accepted) = P(accepted | k = m) * P(k = m) = C * h(m) = C / m^exponent.
+                    //
+                    // In both cases the probabilities are proportional to the probability mass function
+                    // of the Zipf distribution.
+
+                    return k;
                 }
             }
         }
 
         /**
-         * Returns a sample from the instrumental distribution tail for a given
-         * uniformly distributed random value.
+         * {@code H(x) :=}
+         * <ul>
+         * <li>{@code (x^(1-exponent) - 1)/(1 - exponent)}, if {@code exponent != 1}</li>
+         * <li>{@code log(x)}, if {@code exponent == 1}</li>
+         * </ul>
+         * H(x) is an integral function of h(x),
+         * the derivative of H(x) is h(x).
          *
-         * @param q a uniformly distributed random value taken from [0,1]
-         * @return a sample in the range [2, {@link #numberOfElements}]
+         * @param x free parameter
+         * @return {@code H(x)}
          */
-        int sampleFromInstrumentalDistributionTail(double q) {
-            final double a = 1.5;
-            final double b = numberOfElements + 0.5;
-            final double logBdviA = FastMath.log(b / a);
-
-            final int result  = (int) (a * FastMath.exp(logBdviA * helper1(q, logBdviA * (1. - exponent))) + 0.5);
-            if (result < 2) {
-                return 2;
-            }
-            if (result > numberOfElements) {
-                return numberOfElements;
+        private double hIntegral(final double x) {
+            final double logX = FastMath.log(x);
+            return helper2((1d-exponent)*logX)*logX;
+        }
+
+        /**
+         * {@code h(x) := 1/x^exponent}
+         *
+         * @param x free parameter
+         * @return h(x)
+         */
+        private double h(final double x) {
+            return FastMath.exp(-exponent * FastMath.log(x));
+        }
+
+        /**
+         * The inverse function of H(x).
+         *
+         * @param x free parameter
+         * @return y for which {@code H(y) = x}
+         */
+        private double hIntegralInverse(final double x) {
+            double t = x*(1d-exponent);
+            if (t < -1d) {
+                // Limit value to the range [-1, +inf).
+                // t could be smaller than -1 in some rare cases due to numerical errors.
+                t = -1;
             }
-            return result;
+            return FastMath.exp(helper1(t)*x);
         }
 
         /**
-         * Helper function that calculates log((1-q)+q*exp(x))/x.
+         * Helper function that calculates {@code log(1+x)/x}.
          * <p>
          * A Taylor series expansion is used, if x is close to 0.
          *
-         * @param q a value in the range [0,1]
-         * @param x free parameter
-         * @return log((1-q)+q*exp(x))/x
+         * @param x a value larger than or equal to -1
+         * @return {@code log(1+x)/x}
          */
-        static double helper1(final double q, final double x) {
-            if (Math.abs(x) > 1e-8) {
-                return FastMath.log((1.-q)+q*FastMath.exp(x))/x;
+        static double helper1(final double x) {
+            if (FastMath.abs(x)>1e-8) {
+                return FastMath.log1p(x)/x;
             }
             else {
-                return q*(1.+(1./2.)*x*(1.-q)*(1+(1./3.)*x*((1.-2.*q) + (1./4.)*x*(6*q*q*(q-1)+1))));
+                return 1.-x*((1./2.)-x*((1./3.)-x*(1./4.)));
             }
         }
 
         /**
-         * Helper function to calculate (exp(x)-1)/x.
+         * Helper function to calculate {@code (exp(x)-1)/x}.
          * <p>
          * A Taylor series expansion is used, if x is close to 0.
          *
          * @param x free parameter
-         * @return (exp(x)-1)/x if x is non-zero, 1 if x=0
+         * @return {@code (exp(x)-1)/x} if x is non-zero, or 1 if x=0
          */
         static double helper2(final double x) {
             if (FastMath.abs(x)>1e-8) {
@@ -387,36 +457,5 @@ public class ZipfDistribution extends AbstractIntegerDistribution {
                 return 1.+x*(1./2.)*(1.+x*(1./3.)*(1.+x*(1./4.)));
             }
         }
-
-        /**
-         * Integrates the power function x^r from x=a to b.
-         *
-         * @param r the exponent
-         * @param a the integral lower bound
-         * @param b the integral upper bound
-         * @return the calculated integral value
-         */
-        static double integratePowerFunction(final double r, final double a, final double b) {
-            final double logA = FastMath.log(a);
-            final double logBdivA = FastMath.log(b/a);
-            return FastMath.exp((1.+r)*logA)*helper2((1.+r)*logBdivA)*logBdivA;
-
-        }
-
-        /**
-         * Calculates the acceptance rate for a sample taken from the tail of the instrumental distribution.
-         * <p>
-         * The acceptance rate is given by the ratio k^(-s)/I(-s,k-0.5, k+0.5)
-         * where I(r,a,b) is the integral of x^r for x from a to b.
-         *
-         * @param k the value which has been sampled using the instrumental distribution
-         * @return the acceptance rate
-         */
-        double acceptanceRateForTailSample(int k) {
-            final double a = FastMath.log1p(1./(2.*k-1.));
-            final double b = FastMath.log1p(2./(2.*k-1.));
-            return FastMath.exp((1.-exponent)*a)/(k*b*helper2((1.-exponent)*b));
-        }
     }
-
 }

http://git-wip-us.apache.org/repos/asf/commons-math/blob/484196fc/src/test/java/org/apache/commons/math3/distribution/ZipfDistributionTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/math3/distribution/ZipfDistributionTest.java b/src/test/java/org/apache/commons/math3/distribution/ZipfDistributionTest.java
index 3c177ef..93c180e 100644
--- a/src/test/java/org/apache/commons/math3/distribution/ZipfDistributionTest.java
+++ b/src/test/java/org/apache/commons/math3/distribution/ZipfDistributionTest.java
@@ -17,13 +17,8 @@
 
 package org.apache.commons.math3.distribution;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
 import org.apache.commons.math3.TestUtils;
-import org.apache.commons.math3.analysis.UnivariateFunction;
-import org.apache.commons.math3.analysis.integration.SimpsonIntegrator;
-import org.apache.commons.math3.distribution.ZipfDistribution.ZipfRejectionSampler;
+import org.apache.commons.math3.distribution.ZipfDistribution.ZipfRejectionInversionSampler;
 import org.apache.commons.math3.exception.NotStrictlyPositiveException;
 import org.apache.commons.math3.random.AbstractRandomGenerator;
 import org.apache.commons.math3.random.RandomGenerator;
@@ -73,7 +68,7 @@ public class ZipfDistributionTest extends IntegerDistributionAbstractTest {
 
     /**
      * Creates the default probability density test expected values.
-     *  Reference values are from R, version 2.15.3 (VGAM package 0.9-0).
+     * Reference values are from R, version 2.15.3 (VGAM package 0.9-0).
      */
     @Override
     public double[] makeDensityTestValues() {
@@ -129,6 +124,7 @@ public class ZipfDistributionTest extends IntegerDistributionAbstractTest {
         Assert.assertEquals(dist.getNumericalVariance(), 0.24264068711928521, tol);
     }
 
+
     /**
      * Test sampling for various number of points and exponents.
      */
@@ -140,9 +136,9 @@ public class ZipfDistributionTest extends IntegerDistributionAbstractTest {
             2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100
         };
         double[] exponentValues = {
-            1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1,
+            1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 2e-1, 5e-1,
             1. - 1e-9, 1.0, 1. + 1e-9, 1.1, 1.2, 1.3, 1.5, 1.6, 1.7, 1.8, 2.0,
-            2.5, 3.0, 4., 5., 6., 7., 8., 9., 10., 20., 30.
+            2.5, 3.0, 4., 5., 6., 7., 8., 9., 10., 20., 30., 100., 150.
         };
 
         for (int numPoints : numPointsValues) {
@@ -172,110 +168,23 @@ public class ZipfDistributionTest extends IntegerDistributionAbstractTest {
     }
 
     @Test
-    public void testSamplerIntegratePowerFunction() {
-        final double tol = 1e-6;
-        final double[] exponents = {
-            -1e-5, -1e-4, -1e-3, -1e-2, -1e-1, -1e0, -1e1
-        };
-        final double[] limits = {
-            0.5, 1., 1.5, 2., 2.5, 3., 3.5, 4., 4.5, 5., 5.5, 6.0, 6.5, 7.0,
-            7.5, 8.0, 8.5, 9.0, 9.5, 10.0
-        };
-
-        for (final double exponent : exponents) {
-            for (int lowerLimitIndex = 0; lowerLimitIndex < limits.length; ++lowerLimitIndex) {
-                final double lowerLimit = limits[lowerLimitIndex];
-                for (int upperLimitIndex = lowerLimitIndex+1; upperLimitIndex < limits.length; ++upperLimitIndex) {
-                    final double upperLimit = limits[upperLimitIndex];
-                    final double result1 = new SimpsonIntegrator().integrate(10000, new UnivariateFunction() {
-                        public double value(double x) {
-                            return Math.pow(x, exponent);
-                        }
-                    }, lowerLimit, upperLimit);
-
-                    final double result2 = ZipfRejectionSampler.integratePowerFunction(exponent, lowerLimit, upperLimit);
-
-                    assertEquals(result1, result2, (result1+result2)*tol);
-
-                }
-            }
-        }
-    }
-
-    @Test
-    public void testSamplerAcceptanceRate() {
+    public void testSamplerHelper1() {
         final double tol = 1e-12;
-        final double[] exponents = {
-            1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 2e0, 5e0, 1e1, 1e2, 1e3
-        };
-        final int[] values = {
-            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
+        final double[] testValues = {
+            Math.nextUp(-1.), -1e-1, -1e-2, -1e-3, -1e-4, -1e-5, -1e-6, -1e-7, -1e-8,
+            -1e-9, -1e-10, -1e-11, 0., 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6,
+            1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0
         };
-        final int numberOfElements = 1000;
-        for (final double exponent : exponents) {
-            ZipfRejectionSampler sampler = new ZipfRejectionSampler(numberOfElements, exponent);
-            for (final int value : values) {
-                double expected = FastMath.pow(value, -exponent);
-                double result = sampler.acceptanceRateForTailSample(value) *
-                                ZipfRejectionSampler.integratePowerFunction(-exponent, value - 0.5, value + 0.5);
-                TestUtils.assertRelativelyEquals(expected, result, tol);
-                assertTrue(result <=1.); // test Jensen's inequality
-            }
+        for (final double testValue : testValues) {
+            final double expected = FastMath.log1p(testValue);
+            TestUtils.assertRelativelyEquals(expected, ZipfRejectionInversionSampler.helper1(testValue)*testValue, tol);
         }
     }
 
-    @Test
-    public void testSamplerInverseInstrumentalDistribution() {
-        final double tol = 1e-14;
-        final double[] exponentValues = {
-            1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 2E0, 3e0, 4e0, 5e0, 6., 7., 8., 9., 10., 50.
-        };
-        final double[] qValues = {
-            0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0
-        };
-        final int[] numberOfElementsValues = {
-            2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 100
-        };
-
-        for (final double exponent : exponentValues) {
-            for (final int numberOfElements : numberOfElementsValues) {
-                final ZipfRejectionSampler sampler = new ZipfRejectionSampler(numberOfElements, exponent);
-                for (final double q : qValues) {
-                    int result = sampler.sampleFromInstrumentalDistributionTail(q);
-                    double total =
-                            ZipfRejectionSampler.integratePowerFunction(-exponent, 1.5, numberOfElements + 0.5);
-                    double lowerBound =
-                            ZipfRejectionSampler.integratePowerFunction(-exponent, 1.5, result - 0.5) / total;
-                    double upperBound =
-                            ZipfRejectionSampler.integratePowerFunction(-exponent, 1.5, result + 0.5) / total;
-                    assertTrue(lowerBound <= q*(1.+tol));
-                    assertTrue(upperBound >= q*(1.-tol));
-                }
-            }
-        }
-    }
 
     @Test
-    public void testSamplerHelper1() {
-        final double tol = 1e-14;
-        final double[] qValues = {
-            0., 1e-12, 1e-11, 1e-10, 1e-9, 9e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4,
-            1e-3, 1e-2, 1e-1, 1e0
-        };
-        final double[] xValues = {
-            -Double.MAX_VALUE, -1e10, -1e9, -1e8, -1e7, -1e6, -1e5, -1e4, -1e3,
-            -1e2, -1e1, -1e0, -1e-1, -1e-2, -1e-3, -1e-4, -1e-5, -1e-6, -1e-7,
-            -1e-8, -1e-9, -1e-10, -Double.MIN_VALUE, 0.0, Double.MIN_VALUE,
-            1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0,
-            1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, Double.MAX_VALUE
-        };
-
-        for (final double q : qValues) {
-            for(final double x : xValues) {
-                double calculated = ZipfRejectionSampler.helper1(q, x);
-                TestUtils.assertRelativelyEquals((1.-q)+q*Math.exp(x), FastMath.exp(calculated*x), tol);
-            }
-        }
+    public void testSamplerHelper1Minus1() {
+        Assert.assertEquals(Double.POSITIVE_INFINITY, ZipfRejectionInversionSampler.helper1(-1d), 0d);
     }
 
     @Test
@@ -288,7 +197,7 @@ public class ZipfDistributionTest extends IntegerDistributionAbstractTest {
         };
         for (double testValue : testValues) {
             final double expected = FastMath.expm1(testValue);
-            TestUtils.assertRelativelyEquals(expected, ZipfRejectionSampler.helper2(testValue)*testValue, tol);
+            TestUtils.assertRelativelyEquals(expected, ZipfRejectionInversionSampler.helper2(testValue)*testValue, tol);
         }
     }