You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ah...@apache.org on 2021/09/21 16:32:49 UTC

[commons-statistics] 03/04: STATISTICS-34: Switch the Geometric PMF based on the probability

This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-statistics.git

commit 5f44ae372410b1994933875bd1ea79b73464b0df
Author: aherbert <ah...@apache.org>
AuthorDate: Tue Sep 21 17:28:04 2021 +0100

    STATISTICS-34: Switch the Geometric PMF based on the probability
    
    Using the direct implementation of the PMF is more accurate when 1-p is
    exact.
---
 .../distribution/GeometricDistribution.java        | 20 ++++++++++++++-
 .../distribution/GeometricDistributionTest.java    | 29 ++++++++++++++++++++++
 2 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/GeometricDistribution.java b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/GeometricDistribution.java
index 0724715..a0c6aaa 100644
--- a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/GeometricDistribution.java
+++ b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/GeometricDistribution.java
@@ -16,6 +16,7 @@
  */
 package org.apache.commons.statistics.distribution;
 
+import java.util.function.IntToDoubleFunction;
 import org.apache.commons.rng.UniformRandomProvider;
 import org.apache.commons.rng.sampling.distribution.GeometricSampler;
 
@@ -23,12 +24,17 @@ import org.apache.commons.rng.sampling.distribution.GeometricSampler;
  * Implementation of the <a href="http://en.wikipedia.org/wiki/Geometric_distribution">geometric distribution</a>.
  */
 public class GeometricDistribution extends AbstractDiscreteDistribution {
+    /** 1/2. */
+    private static final double HALF = 0.5;
+
     /** The probability of success. */
     private final double probabilityOfSuccess;
     /** {@code log(p)} where p is the probability of success. */
     private final double logProbabilityOfSuccess;
     /** {@code log(1 - p)} where p is the probability of success. */
     private final double log1mProbabilityOfSuccess;
+    /** Implementation of PMF(x). Assumes that {@code x > 0}. */
+    private final IntToDoubleFunction pmf;
 
     /**
      * Creates a geometric distribution.
@@ -44,6 +50,18 @@ public class GeometricDistribution extends AbstractDiscreteDistribution {
         probabilityOfSuccess = p;
         logProbabilityOfSuccess = Math.log(p);
         log1mProbabilityOfSuccess = Math.log1p(-p);
+
+        // Choose the PMF implementation.
+        // When p >= 0.5 then 1 - p is exact and using the power function
+        // is consistently more accurate than the use of the exponential function.
+        // When p -> 0 then the exponential function avoids large error propagation
+        // of the power function used with an inexact 1 - p.
+        if (p >= HALF) {
+            final double q = 1 - p;
+            pmf = x -> Math.pow(q, x) * probabilityOfSuccess;
+        } else {
+            pmf = x -> Math.exp(log1mProbabilityOfSuccess * x) * probabilityOfSuccess;
+        }
     }
 
     /**
@@ -62,7 +80,7 @@ public class GeometricDistribution extends AbstractDiscreteDistribution {
             // Special case of x=0 exploiting cancellation.
             return x == 0 ? probabilityOfSuccess : 0.0;
         }
-        return Math.exp(log1mProbabilityOfSuccess * x) * probabilityOfSuccess;
+        return pmf.applyAsDouble(x);
     }
 
     /** {@inheritDoc} */
diff --git a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/GeometricDistributionTest.java b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/GeometricDistributionTest.java
index 9081408..1f41e00 100644
--- a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/GeometricDistributionTest.java
+++ b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/GeometricDistributionTest.java
@@ -187,6 +187,35 @@ class GeometricDistributionTest extends DiscreteDistributionAbstractTest {
     }
 
     /**
+     * Test the PMF is computed using the power function when p is above 0.5.
+     * <p>Note: The geometric distribution PMF is defined as:
+     * <pre>
+     *   pmf(x) = (1-p)^x * p
+     * </pre>
+     * <p>As {@code p -> 0} use of the power function should be avoided as it will
+     * propagate the inexact computation of {@code 1 - p}. The implementation can
+     * switch to using a rearrangement with the exponential function which avoid
+     * computing {@code 1 - p}.
+     * <p>See STATISTICS-34.
+     *
+     * @param p Probability of success
+     */
+    @ParameterizedTest
+    @ValueSource(doubles = {0.5, 0.6658665, 0.75, 0.8125347, 0.9, 0.95, 0.99})
+    void testPMF(double p) {
+        final GeometricDistribution dist = new GeometricDistribution(p);
+        setDistribution(dist);
+
+        // The PMF should be an exact match to the direct implementation with Math.pow.
+        setTolerance(0);
+        final int[] x = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40};
+        final double[] values = Arrays.stream(x).mapToDouble(k -> p * Math.pow(1 - p, k)).toArray();
+        setProbabilityTestPoints(x);
+        setProbabilityTestValues(values);
+        verifyProbabilities();
+    }
+
+    /**
      * Test the inverse CDF returns the correct x from the CDF result.
      * This case was identified using various probabilities to discover a mismatch
      * of x != icdf(cdf(x)). This occurs due to rounding errors on the inversion.