You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ah...@apache.org on 2022/11/24 16:40:15 UTC

[commons-rng] branch master updated: RNG-183: Update sampling of Pareto distribution with extreme shape

This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-rng.git


The following commit(s) were added to refs/heads/master by this push:
     new 25fa9cd6 RNG-183: Update sampling of Pareto distribution with extreme shape
25fa9cd6 is described below

commit 25fa9cd68ab1061c7f914543d69b5e2b3e98056f
Author: aherbert <ah...@apache.org>
AuthorDate: Thu Nov 24 16:40:08 2022 +0000

    RNG-183: Update sampling of Pareto distribution with extreme shape
    
    Large shape should sample from uniform p in [0, 1) to concentrate
    samples towards the scale parameter.
    
    Tiny shape should sample from uniform p in (0, 1] to concentrate samples
    towards infinity.
---
 .../rng/sampling/distribution/InternalUtils.java   |  12 ++
 .../InverseTransformParetoSampler.java             |  15 ++-
 .../distribution/ContinuousSamplersList.java       |   4 +
 .../sampling/distribution/InternalUtilsTest.java   |  24 ++++
 .../InverseTransformParetoSamplerTest.java         | 130 ++++++++++++++++++++-
 src/changes/changes.xml                            |   6 +
 6 files changed, 184 insertions(+), 7 deletions(-)

diff --git a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/InternalUtils.java b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/InternalUtils.java
index 64d3ddca..fa89e47f 100644
--- a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/InternalUtils.java
+++ b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/InternalUtils.java
@@ -124,6 +124,18 @@ final class InternalUtils { // Class is package-private on purpose; do not make
         return (NormalizedGaussianSampler) newSampler;
     }
 
+    /**
+     * Creates a {@code double} in the interval {@code [0, 1)} from a {@code long} value.
+     *
+     * @param v Number.
+     * @return a {@code double} value in the interval {@code [0, 1)}.
+     */
+    static double makeDouble(long v) {
+        // This matches the method in o.a.c.rng.core.util.NumberFactory.makeDouble(long)
+        // without adding an explicit dependency on that module.
+        return (v >>> 11) * DOUBLE_MULTIPLIER;
+    }
+
     /**
      * Creates a {@code double} in the interval {@code (0, 1]} from a {@code long} value.
      *
diff --git a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/InverseTransformParetoSampler.java b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/InverseTransformParetoSampler.java
index b2e4aaf7..04acc293 100644
--- a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/InverseTransformParetoSampler.java
+++ b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/InverseTransformParetoSampler.java
@@ -16,12 +16,13 @@
  */
 package org.apache.commons.rng.sampling.distribution;
 
+import java.util.function.LongToDoubleFunction;
 import org.apache.commons.rng.UniformRandomProvider;
 
 /**
  * Sampling from a <a href="https://en.wikipedia.org/wiki/Pareto_distribution">Pareto distribution</a>.
  *
- * <p>Sampling uses {@link UniformRandomProvider#nextDouble()}.</p>
+ * <p>Sampling uses {@link UniformRandomProvider#nextLong()}.</p>
  *
  * @since 1.0
  */
@@ -34,6 +35,8 @@ public class InverseTransformParetoSampler
     private final double oneOverShape;
     /** Underlying source of randomness. */
     private final UniformRandomProvider rng;
+    /** Method to generate the (1 - p) value. */
+    private final LongToDoubleFunction nextDouble;
 
     /**
      * @param rng Generator of uniformly distributed random numbers.
@@ -54,6 +57,13 @@ public class InverseTransformParetoSampler
         this.rng = rng;
         this.scale = scale;
         this.oneOverShape = 1 / shape;
+        // Generate (1 - p) so that samples are concentrated to the lower/upper bound:
+        // large shape samples from p in [0, 1)  (lower bound)
+        // small shape samples from p in (0, 1]  (upper bound)
+        // Note that the method used is logically reversed as it generates (1 - p).
+        nextDouble = shape >= 1 ?
+            InternalUtils::makeNonZeroDouble :
+            InternalUtils::makeDouble;
     }
 
     /**
@@ -66,12 +76,13 @@ public class InverseTransformParetoSampler
         this.rng = rng;
         scale = source.scale;
         oneOverShape = source.oneOverShape;
+        nextDouble = source.nextDouble;
     }
 
     /** {@inheritDoc} */
     @Override
     public double sample() {
-        return scale / Math.pow(rng.nextDouble(), oneOverShape);
+        return scale / Math.pow(nextDouble.applyAsDouble(rng.nextLong()), oneOverShape);
     }
 
     /** {@inheritDoc} */
diff --git a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/ContinuousSamplersList.java b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/ContinuousSamplersList.java
index c3e81fac..a9f9e6d0 100644
--- a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/ContinuousSamplersList.java
+++ b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/ContinuousSamplersList.java
@@ -197,6 +197,10 @@ public final class ContinuousSamplersList {
             // Pareto.
             add(LIST, new org.apache.commons.math3.distribution.ParetoDistribution(unusedRng, scalePareto, shapePareto),
                 InverseTransformParetoSampler.of(RandomAssert.createRNG(), scalePareto, shapePareto));
+            // Large shape
+            final double shapePareto2 = 12.34;
+            add(LIST, new org.apache.commons.math3.distribution.ParetoDistribution(unusedRng, scalePareto, shapePareto2),
+                InverseTransformParetoSampler.of(RandomAssert.createRNG(), scalePareto, shapePareto2));
 
             // Stable distributions.
             // Gaussian case: alpha=2
diff --git a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/InternalUtilsTest.java b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/InternalUtilsTest.java
index 9951f6a3..f78c5362 100644
--- a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/InternalUtilsTest.java
+++ b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/InternalUtilsTest.java
@@ -17,6 +17,8 @@
 package org.apache.commons.rng.sampling.distribution;
 
 import org.apache.commons.math3.special.Gamma;
+import org.apache.commons.rng.UniformRandomProvider;
+import org.apache.commons.rng.sampling.RandomAssert;
 import org.apache.commons.rng.sampling.distribution.InternalUtils.FactorialLog;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
@@ -107,4 +109,26 @@ class InternalUtilsTest {
         Assertions.assertThrows(NegativeArraySizeException.class,
             () -> factorialLog.withCache(-1));
     }
+
+    @Test
+    void testMakeDouble() {
+        final UniformRandomProvider rng1 = RandomAssert.seededRNG();
+        final UniformRandomProvider rng2 = RandomAssert.seededRNG();
+        for (int i = 0; i < 10; i++) {
+            // Assume the RNG is outputting in [0, 1) using the same method
+            Assertions.assertEquals(rng1.nextDouble(), InternalUtils.makeDouble(rng2.nextLong()));
+        }
+    }
+
+    @Test
+    void testMakeNonZeroDouble() {
+        final UniformRandomProvider rng1 = RandomAssert.seededRNG();
+        final UniformRandomProvider rng2 = RandomAssert.seededRNG();
+        final double u = 0x1.0p-53;
+        for (int i = 0; i < 10; i++) {
+            // Assume the RNG is outputting in [0, 1)
+            // The non-zero method should shift this to (0, 1]
+            Assertions.assertEquals(rng1.nextDouble() + u, InternalUtils.makeNonZeroDouble(rng2.nextLong()));
+        }
+    }
 }
diff --git a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/InverseTransformParetoSamplerTest.java b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/InverseTransformParetoSamplerTest.java
index b1fa95a7..18b06a21 100644
--- a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/InverseTransformParetoSamplerTest.java
+++ b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/InverseTransformParetoSamplerTest.java
@@ -20,11 +20,19 @@ import org.apache.commons.rng.UniformRandomProvider;
 import org.apache.commons.rng.sampling.RandomAssert;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.CsvSource;
 
 /**
  * Test for the {@link InverseTransformParetoSampler}. The tests hit edge cases for the sampler.
  */
 class InverseTransformParetoSamplerTest {
+    /**
+     * The multiplier to convert the least significant 53-bits of a {@code long} to a {@code double}.
+     * This is the smallest non-zero value output from nextDouble.
+     */
+    private static final double U = 0x1.0p-53d;
+
     /**
      * Test the constructor with a bad scale.
      */
@@ -50,17 +58,129 @@ class InverseTransformParetoSamplerTest {
     }
 
     /**
-     * Test the SharedStateSampler implementation.
+     * Test the SharedStateSampler implementation with a shape above and below 1.
      */
-    @Test
-    void testSharedStateSampler() {
+    @ParameterizedTest
+    @CsvSource({
+        "1.23, 4.56",
+        "1.23, 0.56",
+    })
+    void testSharedStateSampler(double scale, double shape) {
         final UniformRandomProvider rng1 = RandomAssert.seededRNG();
         final UniformRandomProvider rng2 = RandomAssert.seededRNG();
-        final double scale = 1.23;
-        final double shape = 4.56;
         final SharedStateContinuousSampler sampler1 =
             InverseTransformParetoSampler.of(rng1, scale, shape);
         final SharedStateContinuousSampler sampler2 = sampler1.withUniformRandomProvider(rng2);
         RandomAssert.assertProduceSameSequence(sampler1, sampler2);
     }
+
+    /**
+     * Test a large shape is sampled using inversion of p in [0, 1).
+     */
+    @ParameterizedTest
+    @CsvSource({
+        "12.34, Infinity",
+        "1.23, Infinity",
+        "0.1, Infinity",
+        // Also valid when shape is finite.
+        // Limit taken from o.a.c.statistics.ParetoDistributionTest
+        "12.34, 6.6179136542552806e17",
+        "1.23, 6.6179136542552806e17",
+        "0.1, 6.6179136542552806e17",
+    })
+    void testLargeShape(double scale, double shape) {
+        // Assert the inversion using the power function.
+        // Note here the first argument to pow is (1 - p)
+        final double oneOverShape = 1 / shape;
+        // Note that inverse CDF(p=1) should be infinity (upper bound).
+        // However this requires a check on the value of p and the
+        // inversion returns the lower bound when 1 / shape = 0 due
+        // to pow(0, 0) == 1
+        final double p1expected = oneOverShape == 0 ? scale : Double.POSITIVE_INFINITY;
+        Assertions.assertEquals(p1expected, scale / Math.pow(0, oneOverShape));
+        Assertions.assertEquals(scale, scale / Math.pow(U, oneOverShape));
+        Assertions.assertEquals(scale, scale / Math.pow(1 - U, oneOverShape));
+        Assertions.assertEquals(scale, scale / Math.pow(1, oneOverShape));
+
+        // Sampling should be as if p in [0, 1) so avoiding an infinite sample for
+        // large finite shape when p=1
+        assertSampler(scale, shape, scale);
+    }
+
+    /**
+     * Test a tiny shape is sampled using inversion of p in (0, 1].
+     */
+    @ParameterizedTest
+    @CsvSource({
+        "12.34, 4.9e-324",
+        "1.23, 4.9e-324",
+        "0.1, 4.9e-324",
+        // Also valid when 1 / shape is finite.
+        // Limit taken from o.a.c.statistics.ParetoDistributionTest
+        "12.34, 7.456765604783329e-20",
+        "1.23, 7.456765604783329e-20",
+        "0.1, 7.456765604783329e-20",
+    })
+    void testTinyShape(double scale, double shape) {
+        // Assert the inversion using the power function.
+        // Note here the first argument to pow is (1 - p)
+        final double oneOverShape = 1 / shape;
+        Assertions.assertEquals(Double.POSITIVE_INFINITY, scale / Math.pow(0, oneOverShape));
+        Assertions.assertEquals(Double.POSITIVE_INFINITY, scale / Math.pow(U, oneOverShape));
+        Assertions.assertEquals(Double.POSITIVE_INFINITY, scale / Math.pow(1 - U, oneOverShape));
+        // Note that inverse CDF(p=0) should be scale (lower bound).
+        // However this requires a check on the value of p and the
+        // inversion returns NaN due to pow(1, inf) == NaN.
+        final double p0expected = oneOverShape == Double.POSITIVE_INFINITY ? Double.NaN : scale;
+        Assertions.assertEquals(p0expected, scale / Math.pow(1, oneOverShape));
+
+        // Sampling should be as if p in (0, 1] so avoiding a NaN sample for
+        // infinite 1 / shape when p=0
+        assertSampler(scale, shape, Double.POSITIVE_INFINITY);
+    }
+
+    /**
+     * Assert the sampler produces the expected sample value irrespective of the values from the RNG.
+     *
+     * @param scale Distribution scale
+     * @param shape Distribution shape
+     * @param expected Expected sample value
+     */
+    private static void assertSampler(double scale, double shape, double expected) {
+        // Extreme random numbers using no bits or all bits, then combinations
+        // that may be used to generate a double from the lower or upper 53-bits
+        final long[] values = {0, -1, 1, 1L << 11, -2, -2L << 11};
+        final UniformRandomProvider rng = createRNG(values);
+        ContinuousSampler s = InverseTransformParetoSampler.of(rng, scale, shape);
+        for (final long l : values) {
+            Assertions.assertEquals(expected, s.sample(), () -> "long bits = " + l);
+        }
+        // Any random number
+        s = InverseTransformParetoSampler.of(RandomAssert.createRNG(), scale, shape);
+        for (int i = 0; i < 100; i++) {
+            Assertions.assertEquals(expected, s.sample());
+        }
+    }
+
+    /**
+     * Creates the RNG to return the given values from the nextLong() method.
+     *
+     * @param values Long values
+     * @return the RNG
+     */
+    private static UniformRandomProvider createRNG(long... values) {
+        return new UniformRandomProvider() {
+            private int i;
+
+            @Override
+            public long nextLong() {
+                return values[i++];
+            }
+
+            @Override
+            public double nextDouble() {
+                throw new IllegalStateException("nextDouble cannot be trusted to be in [0, 1) and should be ignored");
+            }
+        };
+    }
 }
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 1b23b557..9c79b2f9 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -56,6 +56,12 @@ If the output is not quite correct, check for invisible trailing spaces!
     <release version="1.6" date="TBD" description="
 New features, updates and bug fixes (requires Java 8).
 ">
+      <action dev="aherbert" type="update" issue="RNG-183">
+        "InverseTransformParetoSampler": Modified to concentrate samples at the distribution
+        lower/upper bounds for extreme shape parameters. Eliminates generation of outlier
+        infinite samples and NaN samples under certain conditions. Changes sampling to use
+        the RNG nextLong() method in-place of nextDouble().
+      </action>
 </release>
 
     <release version="1.5" date="2022-09-10" description="