You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ah...@apache.org on 2021/09/21 16:32:49 UTC
[commons-statistics] 03/04: STATISTICS-34: Switch the Geometric PMF
based on the probability
This is an automated email from the ASF dual-hosted git repository.
aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-statistics.git
commit 5f44ae372410b1994933875bd1ea79b73464b0df
Author: aherbert <ah...@apache.org>
AuthorDate: Tue Sep 21 17:28:04 2021 +0100
STATISTICS-34: Switch the Geometric PMF based on the probability
Using the direct implementation of the PMF is more accurate when 1-p is
exact.
---
.../distribution/GeometricDistribution.java | 20 ++++++++++++++-
.../distribution/GeometricDistributionTest.java | 29 ++++++++++++++++++++++
2 files changed, 48 insertions(+), 1 deletion(-)
diff --git a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/GeometricDistribution.java b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/GeometricDistribution.java
index 0724715..a0c6aaa 100644
--- a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/GeometricDistribution.java
+++ b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/GeometricDistribution.java
@@ -16,6 +16,7 @@
*/
package org.apache.commons.statistics.distribution;
+import java.util.function.IntToDoubleFunction;
import org.apache.commons.rng.UniformRandomProvider;
import org.apache.commons.rng.sampling.distribution.GeometricSampler;
@@ -23,12 +24,17 @@ import org.apache.commons.rng.sampling.distribution.GeometricSampler;
* Implementation of the <a href="http://en.wikipedia.org/wiki/Geometric_distribution">geometric distribution</a>.
*/
public class GeometricDistribution extends AbstractDiscreteDistribution {
+ /** 1/2. */
+ private static final double HALF = 0.5;
+
/** The probability of success. */
private final double probabilityOfSuccess;
/** {@code log(p)} where p is the probability of success. */
private final double logProbabilityOfSuccess;
/** {@code log(1 - p)} where p is the probability of success. */
private final double log1mProbabilityOfSuccess;
+ /** Implementation of PMF(x). Assumes that {@code x > 0}. */
+ private final IntToDoubleFunction pmf;
/**
* Creates a geometric distribution.
@@ -44,6 +50,18 @@ public class GeometricDistribution extends AbstractDiscreteDistribution {
probabilityOfSuccess = p;
logProbabilityOfSuccess = Math.log(p);
log1mProbabilityOfSuccess = Math.log1p(-p);
+
+ // Choose the PMF implementation.
+ // When p >= 0.5 then 1 - p is exact and using the power function
+ // is consistently more accurate than the use of the exponential function.
+ // When p -> 0 then the exponential function avoids large error propagation
+ // of the power function used with an inexact 1 - p.
+ if (p >= HALF) {
+ final double q = 1 - p;
+ pmf = x -> Math.pow(q, x) * probabilityOfSuccess;
+ } else {
+ pmf = x -> Math.exp(log1mProbabilityOfSuccess * x) * probabilityOfSuccess;
+ }
}
/**
@@ -62,7 +80,7 @@ public class GeometricDistribution extends AbstractDiscreteDistribution {
// Special case of x=0 exploiting cancellation.
return x == 0 ? probabilityOfSuccess : 0.0;
}
- return Math.exp(log1mProbabilityOfSuccess * x) * probabilityOfSuccess;
+ return pmf.applyAsDouble(x);
}
/** {@inheritDoc} */
diff --git a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/GeometricDistributionTest.java b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/GeometricDistributionTest.java
index 9081408..1f41e00 100644
--- a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/GeometricDistributionTest.java
+++ b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/GeometricDistributionTest.java
@@ -187,6 +187,35 @@ class GeometricDistributionTest extends DiscreteDistributionAbstractTest {
}
/**
+ * Test the PMF is computed using the power function when p is above 0.5.
+ * <p>Note: The geometric distribution PMF is defined as:
+ * <pre>
+ * pmf(x) = (1-p)^x * p
+ * </pre>
+ * <p>As {@code p -> 0} use of the power function should be avoided as it will
+ * propagate the inexact computation of {@code 1 - p}. The implementation can
+ * switch to using a rearrangement with the exponential function which avoid
+ * computing {@code 1 - p}.
+ * <p>See STATISTICS-34.
+ *
+ * @param p Probability of success
+ */
+ @ParameterizedTest
+ @ValueSource(doubles = {0.5, 0.6658665, 0.75, 0.8125347, 0.9, 0.95, 0.99})
+ void testPMF(double p) {
+ final GeometricDistribution dist = new GeometricDistribution(p);
+ setDistribution(dist);
+
+ // The PMF should be an exact match to the direct implementation with Math.pow.
+ setTolerance(0);
+ final int[] x = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40};
+ final double[] values = Arrays.stream(x).mapToDouble(k -> p * Math.pow(1 - p, k)).toArray();
+ setProbabilityTestPoints(x);
+ setProbabilityTestValues(values);
+ verifyProbabilities();
+ }
+
+ /**
* Test the inverse CDF returns the correct x from the CDF result.
* This case was identified using various probabilities to discover a mismatch
* of x != icdf(cdf(x)). This occurs due to rounding errors on the inversion.