You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ah...@apache.org on 2022/11/24 16:40:15 UTC
[commons-rng] branch master updated: RNG-183: Update sampling of Pareto distribution with extreme shape
This is an automated email from the ASF dual-hosted git repository.
aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-rng.git
The following commit(s) were added to refs/heads/master by this push:
new 25fa9cd6 RNG-183: Update sampling of Pareto distribution with extreme shape
25fa9cd6 is described below
commit 25fa9cd68ab1061c7f914543d69b5e2b3e98056f
Author: aherbert <ah...@apache.org>
AuthorDate: Thu Nov 24 16:40:08 2022 +0000
RNG-183: Update sampling of Pareto distribution with extreme shape
Large shape should sample from uniform p in [0, 1) to concentrate
samples towards the scale parameter.
Tiny shape should sample from uniform p in (0, 1] to concentrate samples
towards infinity.
---
.../rng/sampling/distribution/InternalUtils.java | 12 ++
.../InverseTransformParetoSampler.java | 15 ++-
.../distribution/ContinuousSamplersList.java | 4 +
.../sampling/distribution/InternalUtilsTest.java | 24 ++++
.../InverseTransformParetoSamplerTest.java | 130 ++++++++++++++++++++-
src/changes/changes.xml | 6 +
6 files changed, 184 insertions(+), 7 deletions(-)
diff --git a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/InternalUtils.java b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/InternalUtils.java
index 64d3ddca..fa89e47f 100644
--- a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/InternalUtils.java
+++ b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/InternalUtils.java
@@ -124,6 +124,18 @@ final class InternalUtils { // Class is package-private on purpose; do not make
return (NormalizedGaussianSampler) newSampler;
}
+ /**
+ * Creates a {@code double} in the interval {@code [0, 1)} from a {@code long} value.
+ *
+ * @param v Number.
+ * @return a {@code double} value in the interval {@code [0, 1)}.
+ */
+ static double makeDouble(long v) {
+ // This matches the method in o.a.c.rng.core.util.NumberFactory.makeDouble(long)
+ // without adding an explicit dependency on that module.
+ return (v >>> 11) * DOUBLE_MULTIPLIER;
+ }
+
/**
* Creates a {@code double} in the interval {@code (0, 1]} from a {@code long} value.
*
diff --git a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/InverseTransformParetoSampler.java b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/InverseTransformParetoSampler.java
index b2e4aaf7..04acc293 100644
--- a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/InverseTransformParetoSampler.java
+++ b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/InverseTransformParetoSampler.java
@@ -16,12 +16,13 @@
*/
package org.apache.commons.rng.sampling.distribution;
+import java.util.function.LongToDoubleFunction;
import org.apache.commons.rng.UniformRandomProvider;
/**
* Sampling from a <a href="https://en.wikipedia.org/wiki/Pareto_distribution">Pareto distribution</a>.
*
- * <p>Sampling uses {@link UniformRandomProvider#nextDouble()}.</p>
+ * <p>Sampling uses {@link UniformRandomProvider#nextLong()}.</p>
*
* @since 1.0
*/
@@ -34,6 +35,8 @@ public class InverseTransformParetoSampler
private final double oneOverShape;
/** Underlying source of randomness. */
private final UniformRandomProvider rng;
+ /** Method to generate the (1 - p) value. */
+ private final LongToDoubleFunction nextDouble;
/**
* @param rng Generator of uniformly distributed random numbers.
@@ -54,6 +57,13 @@ public class InverseTransformParetoSampler
this.rng = rng;
this.scale = scale;
this.oneOverShape = 1 / shape;
+ // Generate (1 - p) so that samples are concentrated to the lower/upper bound:
+ // large shape samples from p in [0, 1) (lower bound)
+ // small shape samples from p in (0, 1] (upper bound)
+ // Note that the method used is logically reversed as it generates (1 - p).
+ nextDouble = shape >= 1 ?
+ InternalUtils::makeNonZeroDouble :
+ InternalUtils::makeDouble;
}
/**
@@ -66,12 +76,13 @@ public class InverseTransformParetoSampler
this.rng = rng;
scale = source.scale;
oneOverShape = source.oneOverShape;
+ nextDouble = source.nextDouble;
}
/** {@inheritDoc} */
@Override
public double sample() {
- return scale / Math.pow(rng.nextDouble(), oneOverShape);
+ return scale / Math.pow(nextDouble.applyAsDouble(rng.nextLong()), oneOverShape);
}
/** {@inheritDoc} */
diff --git a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/ContinuousSamplersList.java b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/ContinuousSamplersList.java
index c3e81fac..a9f9e6d0 100644
--- a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/ContinuousSamplersList.java
+++ b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/ContinuousSamplersList.java
@@ -197,6 +197,10 @@ public final class ContinuousSamplersList {
// Pareto.
add(LIST, new org.apache.commons.math3.distribution.ParetoDistribution(unusedRng, scalePareto, shapePareto),
InverseTransformParetoSampler.of(RandomAssert.createRNG(), scalePareto, shapePareto));
+ // Large shape
+ final double shapePareto2 = 12.34;
+ add(LIST, new org.apache.commons.math3.distribution.ParetoDistribution(unusedRng, scalePareto, shapePareto2),
+ InverseTransformParetoSampler.of(RandomAssert.createRNG(), scalePareto, shapePareto2));
// Stable distributions.
// Gaussian case: alpha=2
diff --git a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/InternalUtilsTest.java b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/InternalUtilsTest.java
index 9951f6a3..f78c5362 100644
--- a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/InternalUtilsTest.java
+++ b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/InternalUtilsTest.java
@@ -17,6 +17,8 @@
package org.apache.commons.rng.sampling.distribution;
import org.apache.commons.math3.special.Gamma;
+import org.apache.commons.rng.UniformRandomProvider;
+import org.apache.commons.rng.sampling.RandomAssert;
import org.apache.commons.rng.sampling.distribution.InternalUtils.FactorialLog;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
@@ -107,4 +109,26 @@ class InternalUtilsTest {
Assertions.assertThrows(NegativeArraySizeException.class,
() -> factorialLog.withCache(-1));
}
+
+ @Test
+ void testMakeDouble() {
+ final UniformRandomProvider rng1 = RandomAssert.seededRNG();
+ final UniformRandomProvider rng2 = RandomAssert.seededRNG();
+ for (int i = 0; i < 10; i++) {
+ // Assume the RNG is outputting in [0, 1) using the same method
+ Assertions.assertEquals(rng1.nextDouble(), InternalUtils.makeDouble(rng2.nextLong()));
+ }
+ }
+
+ @Test
+ void testMakeNonZeroDouble() {
+ final UniformRandomProvider rng1 = RandomAssert.seededRNG();
+ final UniformRandomProvider rng2 = RandomAssert.seededRNG();
+ final double u = 0x1.0p-53;
+ for (int i = 0; i < 10; i++) {
+ // Assume the RNG is outputting in [0, 1)
+ // The non-zero method should shift this to (0, 1]
+ Assertions.assertEquals(rng1.nextDouble() + u, InternalUtils.makeNonZeroDouble(rng2.nextLong()));
+ }
+ }
}
diff --git a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/InverseTransformParetoSamplerTest.java b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/InverseTransformParetoSamplerTest.java
index b1fa95a7..18b06a21 100644
--- a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/InverseTransformParetoSamplerTest.java
+++ b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/InverseTransformParetoSamplerTest.java
@@ -20,11 +20,19 @@ import org.apache.commons.rng.UniformRandomProvider;
import org.apache.commons.rng.sampling.RandomAssert;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.CsvSource;
/**
* Test for the {@link InverseTransformParetoSampler}. The tests hit edge cases for the sampler.
*/
class InverseTransformParetoSamplerTest {
+ /**
+ * The multiplier to convert the least significant 53-bits of a {@code long} to a {@code double}.
+ * This is the smallest non-zero value output from nextDouble.
+ */
+ private static final double U = 0x1.0p-53d;
+
/**
* Test the constructor with a bad scale.
*/
@@ -50,17 +58,129 @@ class InverseTransformParetoSamplerTest {
}
/**
- * Test the SharedStateSampler implementation.
+ * Test the SharedStateSampler implementation with a shape above and below 1.
*/
- @Test
- void testSharedStateSampler() {
+ @ParameterizedTest
+ @CsvSource({
+ "1.23, 4.56",
+ "1.23, 0.56",
+ })
+ void testSharedStateSampler(double scale, double shape) {
final UniformRandomProvider rng1 = RandomAssert.seededRNG();
final UniformRandomProvider rng2 = RandomAssert.seededRNG();
- final double scale = 1.23;
- final double shape = 4.56;
final SharedStateContinuousSampler sampler1 =
InverseTransformParetoSampler.of(rng1, scale, shape);
final SharedStateContinuousSampler sampler2 = sampler1.withUniformRandomProvider(rng2);
RandomAssert.assertProduceSameSequence(sampler1, sampler2);
}
+
+ /**
+ * Test a large shape is sampled using inversion of p in [0, 1).
+ */
+ @ParameterizedTest
+ @CsvSource({
+ "12.34, Infinity",
+ "1.23, Infinity",
+ "0.1, Infinity",
+ // Also valid when shape is finite.
+ // Limit taken from o.a.c.statistics.ParetoDistributionTest
+ "12.34, 6.6179136542552806e17",
+ "1.23, 6.6179136542552806e17",
+ "0.1, 6.6179136542552806e17",
+ })
+ void testLargeShape(double scale, double shape) {
+ // Assert the inversion using the power function.
+ // Note here the first argument to pow is (1 - p)
+ final double oneOverShape = 1 / shape;
+ // Note that inverse CDF(p=1) should be infinity (upper bound).
+ // However this requires a check on the value of p and the
+ // inversion returns the lower bound when 1 / shape = 0 due
+ // to pow(0, 0) == 1
+ final double p1expected = oneOverShape == 0 ? scale : Double.POSITIVE_INFINITY;
+ Assertions.assertEquals(p1expected, scale / Math.pow(0, oneOverShape));
+ Assertions.assertEquals(scale, scale / Math.pow(U, oneOverShape));
+ Assertions.assertEquals(scale, scale / Math.pow(1 - U, oneOverShape));
+ Assertions.assertEquals(scale, scale / Math.pow(1, oneOverShape));
+
+ // Sampling should be as if p in [0, 1) so avoiding an infinite sample for
+ // large finite shape when p=1
+ assertSampler(scale, shape, scale);
+ }
+
+ /**
+ * Test a tiny shape is sampled using inversion of p in (0, 1].
+ */
+ @ParameterizedTest
+ @CsvSource({
+ "12.34, 4.9e-324",
+ "1.23, 4.9e-324",
+ "0.1, 4.9e-324",
+ // Also valid when 1 / shape is finite.
+ // Limit taken from o.a.c.statistics.ParetoDistributionTest
+ "12.34, 7.456765604783329e-20",
+ "1.23, 7.456765604783329e-20",
+ "0.1, 7.456765604783329e-20",
+ })
+ void testTinyShape(double scale, double shape) {
+ // Assert the inversion using the power function.
+ // Note here the first argument to pow is (1 - p)
+ final double oneOverShape = 1 / shape;
+ Assertions.assertEquals(Double.POSITIVE_INFINITY, scale / Math.pow(0, oneOverShape));
+ Assertions.assertEquals(Double.POSITIVE_INFINITY, scale / Math.pow(U, oneOverShape));
+ Assertions.assertEquals(Double.POSITIVE_INFINITY, scale / Math.pow(1 - U, oneOverShape));
+ // Note that inverse CDF(p=0) should be scale (lower bound).
+ // However this requires a check on the value of p and the
+ // inversion returns NaN due to pow(1, inf) == NaN.
+ final double p0expected = oneOverShape == Double.POSITIVE_INFINITY ? Double.NaN : scale;
+ Assertions.assertEquals(p0expected, scale / Math.pow(1, oneOverShape));
+
+ // Sampling should be as if p in (0, 1] so avoiding a NaN sample for
+ // infinite 1 / shape when p=0
+ assertSampler(scale, shape, Double.POSITIVE_INFINITY);
+ }
+
+ /**
+ * Assert the sampler produces the expected sample value irrespective of the values from the RNG.
+ *
+ * @param scale Distribution scale
+ * @param shape Distribution shape
+ * @param expected Expected sample value
+ */
+ private static void assertSampler(double scale, double shape, double expected) {
+ // Extreme random numbers using no bits or all bits, then combinations
+ // that may be used to generate a double from the lower or upper 53-bits
+ final long[] values = {0, -1, 1, 1L << 11, -2, -2L << 11};
+ final UniformRandomProvider rng = createRNG(values);
+ ContinuousSampler s = InverseTransformParetoSampler.of(rng, scale, shape);
+ for (final long l : values) {
+ Assertions.assertEquals(expected, s.sample(), () -> "long bits = " + l);
+ }
+ // Any random number
+ s = InverseTransformParetoSampler.of(RandomAssert.createRNG(), scale, shape);
+ for (int i = 0; i < 100; i++) {
+ Assertions.assertEquals(expected, s.sample());
+ }
+ }
+
+ /**
+ * Creates the RNG to return the given values from the nextLong() method.
+ *
+ * @param values Long values
+ * @return the RNG
+ */
+ private static UniformRandomProvider createRNG(long... values) {
+ return new UniformRandomProvider() {
+ private int i;
+
+ @Override
+ public long nextLong() {
+ return values[i++];
+ }
+
+ @Override
+ public double nextDouble() {
+ throw new IllegalStateException("nextDouble cannot be trusted to be in [0, 1) and should be ignored");
+ }
+ };
+ }
}
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 1b23b557..9c79b2f9 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -56,6 +56,12 @@ If the output is not quite correct, check for invisible trailing spaces!
<release version="1.6" date="TBD" description="
New features, updates and bug fixes (requires Java 8).
">
+ <action dev="aherbert" type="update" issue="RNG-183">
+ "InverseTransformParetoSampler": Modified to concentrate samples at the distribution
+ lower/upper bounds for extreme shape parameters. Eliminates generation of outlier
+ infinite samples and NaN samples under certain conditions. Changes sampling to use
+ the RNG nextLong() method in-place of nextDouble().
+ </action>
</release>
<release version="1.5" date="2022-09-10" description="