You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ah...@apache.org on 2022/01/26 20:04:15 UTC

[commons-statistics] branch master updated (09cd116 -> c636dab)

This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/commons-statistics.git.


    from 09cd116  Update F distribution with the beta function
     new ae1bb0b  Test file formatting
     new 32776c5  Updated PMF vs log PMF assertion at the support bound
     new eef97dd  Fix typo
     new 5d984f2  Reuse temp n result
     new 5fb858a  Add special cases for binomial distribution PMF
     new c636dab  Add sample size = 0 test case

The 6 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../distribution/BinomialDistribution.java         |  37 +++-
 .../distribution/PascalDistribution.java           |   4 +-
 .../distribution/SaddlePointExpansionUtils.java    |   8 +-
 .../distribution/BaseDiscreteDistributionTest.java |  34 +++-
 .../distribution/BinomialDistributionTest.java     | 214 +++++++++++++++++++++
 .../distribution/test.binomial.1.properties        |  25 ++-
 .../distribution/test.binomial.2.properties        |   3 +-
 .../distribution/test.binomial.3.properties        |   3 +-
 .../distribution/test.binomial.4.properties        |  13 +-
 .../distribution/test.binomial.6.properties        |   1 +
 .../distribution/test.binomial.7.properties        |   1 +
 ...properties => test.hypergeometric.5.properties} |   4 +-
 12 files changed, 325 insertions(+), 22 deletions(-)
 copy commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/{test.hypergeometric.3.properties => test.hypergeometric.5.properties} (91%)

[commons-statistics] 05/06: Add special cases for binomial distribution PMF

Posted by ah...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-statistics.git

commit 5fb858a5d1723faa48211378aef8639101975a20
Author: Alex Herbert <ah...@apache.org>
AuthorDate: Wed Jan 26 19:41:07 2022 +0000

    Add special cases for binomial distribution PMF
    
    When x=0 or x=n the power function can be used to evaluate the PMF to 1
    ULP. This is more accurate than exp(logPMF) when the expected result is
    very small.
    
    Increased precision of reference test values.
    
    Added specific bounds test using exact pmf computation.
    
    Updated test tolerance from default 1e-12 to 1e-15.
    
    Added special case tests for p=0, p=1, n=0, n=1.
    
    Updated code to handle p=-0.0.
---
 .../distribution/BinomialDistribution.java         |  37 +++-
 .../distribution/SaddlePointExpansionUtils.java    |   6 +-
 .../distribution/BinomialDistributionTest.java     | 214 +++++++++++++++++++++
 .../distribution/test.binomial.1.properties        |  25 ++-
 .../distribution/test.binomial.2.properties        |   3 +-
 .../distribution/test.binomial.3.properties        |   3 +-
 .../distribution/test.binomial.4.properties        |  13 +-
 7 files changed, 289 insertions(+), 12 deletions(-)

diff --git a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/BinomialDistribution.java b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/BinomialDistribution.java
index 5b5a1c0..e12cb68 100644
--- a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/BinomialDistribution.java
+++ b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/BinomialDistribution.java
@@ -37,10 +37,17 @@ import org.apache.commons.numbers.gamma.RegularizedBeta;
  * @see <a href="https://mathworld.wolfram.com/BinomialDistribution.html">Binomial distribution (MathWorld)</a>
  */
 public final class BinomialDistribution extends AbstractDiscreteDistribution {
+    /** 1/2. */
+    private static final float HALF = 0.5f;
+
     /** The number of trials. */
     private final int numberOfTrials;
     /** The probability of success. */
     private final double probabilityOfSuccess;
+    /** Cached value for pmf(x=0). */
+    private final double pmf0;
+    /** Cached value for pmf(x=n). */
+    private final double pmfn;
 
     /**
      * @param trials Number of trials.
@@ -50,6 +57,16 @@ public final class BinomialDistribution extends AbstractDiscreteDistribution {
                                  double p) {
         probabilityOfSuccess = p;
         numberOfTrials = trials;
+        // Special pmf cases where the power function is more accurate:
+        //   (n choose k) == 1 for k=0, k=n
+        //   pmf = p^k (1-p)^(n-k)
+        // Note: This handles the edge case of n=0: pmf(k=0) = 1, else 0
+        if (probabilityOfSuccess >= HALF) {
+            pmf0 = Math.pow(1 - probabilityOfSuccess, numberOfTrials);
+        } else {
+            pmf0 = Math.exp(numberOfTrials * Math.log1p(-probabilityOfSuccess));
+        }
+        pmfn = Math.pow(probabilityOfSuccess, numberOfTrials);
     }
 
     /**
@@ -68,7 +85,8 @@ public final class BinomialDistribution extends AbstractDiscreteDistribution {
                                             trials);
         }
         ArgumentUtils.checkProbability(p);
-        return new BinomialDistribution(trials, p);
+        // Avoid p = -0.0 to avoid returning -0.0 for some probability computations.
+        return new BinomialDistribution(trials, Math.abs(p));
     }
 
     /**
@@ -92,7 +110,16 @@ public final class BinomialDistribution extends AbstractDiscreteDistribution {
     /** {@inheritDoc} */
     @Override
     public double probability(int x) {
-        return Math.exp(logProbability(x));
+        if (x < 0 || x > numberOfTrials) {
+            return 0;
+        } else if (x == 0) {
+            return pmf0;
+        } else if (x == numberOfTrials) {
+            return pmfn;
+        }
+        return Math.exp(SaddlePointExpansionUtils.logBinomialProbability(x,
+                        numberOfTrials, probabilityOfSuccess,
+                        1.0 - probabilityOfSuccess));
     }
 
     /** {@inheritDoc} **/
@@ -103,6 +130,8 @@ public final class BinomialDistribution extends AbstractDiscreteDistribution {
         } else if (x < 0 || x > numberOfTrials) {
             return Double.NEGATIVE_INFINITY;
         }
+        // Special cases for x=0, x=n
+        // are handled in the saddle point expansion
         return SaddlePointExpansionUtils.logBinomialProbability(x,
                 numberOfTrials, probabilityOfSuccess,
                 1.0 - probabilityOfSuccess);
@@ -115,6 +144,8 @@ public final class BinomialDistribution extends AbstractDiscreteDistribution {
             return 0.0;
         } else if (x >= numberOfTrials) {
             return 1.0;
+        } else if (x == 0) {
+            return pmf0;
         }
         return RegularizedBeta.complement(probabilityOfSuccess,
                                           x + 1.0, (double) numberOfTrials - x);
@@ -127,6 +158,8 @@ public final class BinomialDistribution extends AbstractDiscreteDistribution {
             return 1.0;
         } else if (x >= numberOfTrials) {
             return 0.0;
+        } else if (x == numberOfTrials - 1) {
+            return pmfn;
         }
         return RegularizedBeta.value(probabilityOfSuccess,
                                      x + 1.0, (double) numberOfTrials - x);
diff --git a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/SaddlePointExpansionUtils.java b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/SaddlePointExpansionUtils.java
index c465f42..31ff62d 100644
--- a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/SaddlePointExpansionUtils.java
+++ b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/SaddlePointExpansionUtils.java
@@ -155,14 +155,16 @@ final class SaddlePointExpansionUtils {
     static double logBinomialProbability(int x, int n, double p, double q) {
         if (x == 0) {
             if (p < ONE_TENTH) {
-                return -getDeviancePart(n, n * q) - n * p;
+                // Subtract from 0 avoids returning -0.0 for p=0.0
+                return 0.0 - getDeviancePart(n, n * q) - n * p;
             } else if (n == 0) {
                 return 0;
             }
             return n * Math.log(q);
         } else if (x == n) {
             if (q < ONE_TENTH) {
-                return -getDeviancePart(n, n * p) - n * q;
+                // Subtract from 0 avoids returning -0.0 for p=1.0
+                return 0.0 - getDeviancePart(n, n * p) - n * q;
             }
             return n * Math.log(p);
         }
diff --git a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/BinomialDistributionTest.java b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/BinomialDistributionTest.java
index 261042b..8a46506 100644
--- a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/BinomialDistributionTest.java
+++ b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/BinomialDistributionTest.java
@@ -16,8 +16,13 @@
  */
 package org.apache.commons.statistics.distribution;
 
+import java.math.BigDecimal;
+import java.math.BigInteger;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.CsvSource;
+import org.junit.jupiter.params.provider.ValueSource;
 
 /**
  * Test cases for {@link BinomialDistribution}.
@@ -46,6 +51,11 @@ class BinomialDistributionTest extends BaseDiscreteDistributionTest {
         return new String[] {"NumberOfTrials", "ProbabilityOfSuccess"};
     }
 
+    @Override
+    protected double getRelativeTolerance() {
+        return 1e-15;
+    }
+
     //-------------------- Additional test cases -------------------------------
 
     @Test
@@ -60,4 +70,208 @@ class BinomialDistributionTest extends BaseDiscreteDistributionTest {
             Assertions.assertEquals(trials / 2, p);
         }
     }
+
+    /**
+     * Test special case of probability of success 0.0.
+     */
+    @ParameterizedTest
+    @ValueSource(ints = {0, 1, 2, 3, 10})
+    void testProbabilityOfSuccess0(int n) {
+        // The sign of p should not matter.
+        // Exact equality checks no -0.0 values are generated.
+        for (final double p : new double[] {-0.0, 0.0}) {
+            final BinomialDistribution dist = BinomialDistribution.of(n, p);
+            for (int k = -1; k <= n + 1; k++) {
+                Assertions.assertEquals(k == 0 ? 1.0 : 0.0, dist.probability(k));
+                Assertions.assertEquals(k == 0 ? 0.0 : Double.NEGATIVE_INFINITY, dist.logProbability(k));
+                Assertions.assertEquals(k >= 0 ? 1.0 : 0.0, dist.cumulativeProbability(k));
+                Assertions.assertEquals(k >= 0 ? 0.0 : 1.0, dist.survivalProbability(k));
+            }
+        }
+    }
+
+    /**
+     * Test special case of probability of success 1.0.
+     */
+    @ParameterizedTest
+    @ValueSource(ints = {0, 1, 2, 3, 10})
+    void testProbabilityOfSuccess1(int n) {
+        final BinomialDistribution dist = BinomialDistribution.of(n, 1);
+        // Exact equality checks no -0.0 values are generated.
+        for (int k = -1; k <= n + 1; k++) {
+            Assertions.assertEquals(k == n ? 1.0 : 0.0, dist.probability(k));
+            Assertions.assertEquals(k == n ? 0.0 : Double.NEGATIVE_INFINITY, dist.logProbability(k));
+            Assertions.assertEquals(k >= n ? 1.0 : 0.0, dist.cumulativeProbability(k));
+            Assertions.assertEquals(k >= n ? 0.0 : 1.0, dist.survivalProbability(k));
+        }
+    }
+
+    @ParameterizedTest
+    @ValueSource(doubles = {0, 1, 0.01, 0.99, 1e-17, 0.3645257e-8, 0.123415276368128, 0.67834532657232434})
+    void testNumberOfTrials0(double p) {
+        final BinomialDistribution dist = BinomialDistribution.of(0, p);
+        // Edge case where the probability is ignored when computing the result
+        for (int k = -1; k <= 2; k++) {
+            Assertions.assertEquals(k == 0 ? 1.0 : 0.0, dist.probability(k));
+            Assertions.assertEquals(k == 0 ? 0.0 : Double.NEGATIVE_INFINITY, dist.logProbability(k));
+            Assertions.assertEquals(k >= 0 ? 1.0 : 0.0, dist.cumulativeProbability(k));
+            Assertions.assertEquals(k >= 0 ? 0.0 : 1.0, dist.survivalProbability(k));
+        }
+    }
+
+    @ParameterizedTest
+    @ValueSource(doubles = {0, 1, 0.01, 0.99, 1e-17, 0.3645257e-8, 0.123415276368128, 0.67834532657232434})
+    void testNumberOfTrials1(double p) {
+        final BinomialDistribution dist = BinomialDistribution.of(1, p);
+        // Edge case where the probability should be exact
+        Assertions.assertEquals(0.0, dist.probability(-1));
+        Assertions.assertEquals(1 - p, dist.probability(0));
+        Assertions.assertEquals(p, dist.probability(1));
+        Assertions.assertEquals(0.0, dist.probability(2));
+        Assertions.assertEquals(Double.NEGATIVE_INFINITY, dist.logProbability(-1));
+        // Current implementation does not use log1p so allow an error tolerance
+        TestUtils.assertEquals(Math.log1p(-p), dist.logProbability(0), DoubleTolerances.ulps(1));
+        Assertions.assertEquals(Math.log(p), dist.logProbability(1));
+        Assertions.assertEquals(Double.NEGATIVE_INFINITY, dist.logProbability(2));
+        Assertions.assertEquals(0.0, dist.cumulativeProbability(-1));
+        Assertions.assertEquals(1 - p, dist.cumulativeProbability(0));
+        Assertions.assertEquals(1.0, dist.cumulativeProbability(1));
+        Assertions.assertEquals(1.0, dist.cumulativeProbability(2));
+        Assertions.assertEquals(1.0, dist.survivalProbability(-1));
+        Assertions.assertEquals(p, dist.survivalProbability(0));
+        Assertions.assertEquals(0.0, dist.survivalProbability(1));
+        Assertions.assertEquals(0.0, dist.survivalProbability(2));
+    }
+
+    /**
+     * Special case for x=0.
+     * This hits cases where the SaddlePointExpansionUtils are not used for
+     * probability functions. It ensures the edge case handling in BinomialDistribution
+     * matches the original logic in the saddle point expansion. This x=0 logic is used
+     * by the related hypergeometric distribution and covered by test cases for that
+     * distribution ensuring it is correct.
+     */
+    @ParameterizedTest
+    @ValueSource(doubles = {0, 1, 0.01, 0.99, 1e-17, 0.3645257e-8, 0.123415276368128, 0.67834532657232434})
+    void testX0(double p) {
+        for (final int n : new int[] {0, 1, 10}) {
+            final BinomialDistribution dist = BinomialDistribution.of(n, p);
+            final double expected = SaddlePointExpansionUtils.logBinomialProbability(0, n, p, 1 - p);
+            Assertions.assertEquals(expected, dist.logProbability(0));
+        }
+    }
+
+    /**
+     * Test the probability functions at the lower and upper bounds when the p-values
+     * are very small. The expected results should be within 1 ULP of an exact
+     * computation for pmf(x=0) and pmf(x=n). These values can be computed using
+     * java.lang.Math functions.
+     *
+     * <p>The next value, e.g. pmf(x=1) and cdf(x=1), is asserted to the specified
+     * relative error tolerance. These values require computations using p and 1-p
+     * and are less exact.
+     *
+     * @param n Number of trials
+     * @param p Probability of success
+     * @param eps1 Relative error tolerance for pmf(x=1)
+     * @param epsn1 Relative error tolerance for pmf(x=n-1)
+     */
+    @ParameterizedTest
+    @CsvSource({
+        // Min p-value is shown for reference.
+        "100, 0.50, 8e-15, 8e-15", // 7.888609052210118E-31
+        "100, 0.01, 1e-15, 3e-14", // 1.000000000000002E-200
+        "100, 0.99, 4e-15, 1e-15", // 1.0000000000000887E-200
+        "140, 0.01, 1e-15, 2e-13", // 1.0000000000000029E-280
+        "140, 0.99, 2e-13, 1e-15", // 1.0000000000001244E-280
+        "50, 0.001, 1e-15, 5e-14", // 1.0000000000000011E-150
+        "50, 0.999, 3e-14, 1e-15", // 1.0000000000000444E-150
+    })
+    void testBounds(int n, double p, double eps1, double epsn1) {
+        final BinomialDistribution dist = BinomialDistribution.of(n, p);
+        final BigDecimal prob0 = binomialProbability(n, p, 0);
+        final BigDecimal probn = binomialProbability(n, p, n);
+        final double p0 = prob0.doubleValue();
+        final double pn = probn.doubleValue();
+
+        // Require very small non-zero probabilities to make the test difficult.
+        // Check using 2^-53 so that at least one p-value satisfies 1 - p == 1.
+        final double minp = Math.min(p0, pn);
+        Assertions.assertTrue(minp < 0x1.0p-53, () -> "Test should target small p-values: " + minp);
+        Assertions.assertTrue(minp > Double.MIN_NORMAL, () -> "Minimum P-value should not be sub-normal: " + minp);
+
+        // Almost exact at the bounds
+        final DoubleTolerance tol1 = DoubleTolerances.ulps(1);
+        TestUtils.assertEquals(p0, dist.probability(0), tol1, "pmf(0)");
+        TestUtils.assertEquals(pn, dist.probability(n), tol1, "pmf(n)");
+        // Consistent at the bounds
+        Assertions.assertEquals(dist.probability(0), dist.cumulativeProbability(0), "pmf(0) != cdf(0)");
+        Assertions.assertEquals(dist.probability(n), dist.survivalProbability(n - 1), "pmf(n) != sf(n-1)");
+
+        // Test probability and log probability are consistent.
+        // Avoid log when p-value is close to 1.
+        if (p0 < 0.9) {
+            TestUtils.assertEquals(Math.log(p0), dist.logProbability(0), tol1, "log(pmf(0)) != logpmf(0)");
+        } else {
+            TestUtils.assertEquals(p0, Math.exp(dist.logProbability(0)), tol1, "pmf(0) != exp(logpmf(0))");
+        }
+        if (pn < 0.9) {
+            TestUtils.assertEquals(Math.log(pn), dist.logProbability(n), tol1, "log(pmf(n)) != logpmf(n)");
+        } else {
+            TestUtils.assertEquals(pn, Math.exp(dist.logProbability(n)), tol1, "pmf(n) != exp(logpmf(n))");
+        }
+
+        // The next probability is accurate to the specified tolerance.
+        final BigDecimal prob1 = binomialProbability(n, p, 1);
+        final BigDecimal probn1 = binomialProbability(n, p, n - 1);
+        TestUtils.assertEquals(prob1.doubleValue(), dist.probability(1), DoubleTolerances.relative(eps1), "pmf(1)");
+        TestUtils.assertEquals(probn1.doubleValue(), dist.probability(n - 1), DoubleTolerances.relative(epsn1), "pmf(n-1)");
+
+        // Check the cumulative functions
+        final double cdf1 = prob0.add(prob1).doubleValue();
+        final double sfn2 = probn.add(probn1).doubleValue();
+        TestUtils.assertEquals(cdf1, dist.cumulativeProbability(1), DoubleTolerances.relative(eps1), "cmf(1)");
+        TestUtils.assertEquals(sfn2, dist.survivalProbability(n - 2), DoubleTolerances.relative(epsn1), "sf(n-2)");
+    }
+
+    /**
+     * Compute the binomial distribution probability mass function using exact
+     * arithmetic.
+     *
+     * <p>This has no error handling for invalid arguments.
+     *
+     * <p>Warning: BigDecimal has a limit on the size of the exponent for the power
+     * function. This method has not been extensively tested with very small
+     * p-values, large n or large k. Use of a MathContext to round intermediates may be
+     * required to reduce memory consumption. The binomial coefficient may not
+     * compute for large n and k ~ n/2.
+     *
+     * @param n Number of trials (must be positive)
+     * @param p Probability of success (in [0, 1])
+     * @param k Number of successes (must be positive)
+     * @return pmf(X=k)
+     */
+    private static BigDecimal binomialProbability(int n, double p, int k) {
+        final int nmk = n - k;
+        final int m = Math.min(k, nmk);
+        // Probability component: p^k * (1-p)^(n-k)
+        final BigDecimal bp = new BigDecimal(p);
+        final BigDecimal result = bp.pow(k).multiply(
+                 BigDecimal.ONE.subtract(bp).pow(nmk));
+        // Compute the binomial coefficient
+        // Simple edge cases first.
+        if (m == 0) {
+            return result;
+        } else if (m == 1) {
+            return result.multiply(new BigDecimal(n));
+        }
+        // See org.apache.commons.numbers.combinatorics.BinomialCoefficient
+        BigInteger nCk = BigInteger.ONE;
+        int i = n - m + 1;
+        for (int j = 1; j <= m; j++) {
+            nCk = nCk.multiply(BigInteger.valueOf(i)).divide(BigInteger.valueOf(j));
+            i++;
+        }
+        return new BigDecimal(nCk).multiply(result);
+    }
 }
diff --git a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.1.properties b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.1.properties
index 5064881..81c639d 100644
--- a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.1.properties
+++ b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.1.properties
@@ -23,12 +23,27 @@ upper = 10
 cdf.points = -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
 # Reference values are from R, version 2.15.3.
 cdf.values = \
-  0d, 5.9049e-06, 0.0001436859, 0.0015903864, 0.0105920784,  0.0473489874,\
-  0.1502683326, 0.3503892816, 0.6172172136, 0.8506916541, 0.9717524751, 1d, 1d
+  0.0000000000000000000e+00 5.9049000000000118537e-06 \
+  1.4368590000000025682e-04 1.5903864000000027951e-03 \
+  1.0592078400000015312e-02 4.7348987400000042136e-02 \
+  1.5026833260000005410e-01 3.5038928160000010203e-01 \
+  6.1721721360000014744e-01 8.5069165410000002758e-01 \
+  9.7175247509999995721e-01 1.0000000000000000000e+00 \
+  1.0000000000000000000e+00
 pmf.values = \
-  0d, 0.0000059049d, 0.000137781d, 0.0014467005,\
-  0.009001692, 0.036756909, 0.1029193452, 0.200120949, 0.266827932,\
-  0.2334744405, 0.121060821, 0.0282475249, 0d
+  0.0000000000000000000e+00 5.9049000000000059245e-06 \
+  1.3778100000000039404e-04 1.4467005000000031888e-03 \
+  9.0016920000000021085e-03 3.6756909000000038967e-02 \
+  1.0291934520000002584e-01 2.0012094900000007569e-01 \
+  2.6682793199999993439e-01 2.3347444049999999116e-01 \
+  1.2106082099999986024e-01 2.8247524899999980341e-02 \
+  0.0000000000000000000e+00
+sf.values = \
+  1.00000000000000000000 0.99999409509999992451 0.99985631409999997654 \
+  0.99840961360000002323 0.98940792160000001765 0.95265101259999995786 \
+  0.84973166739999994590 0.64961071839999995348 0.38278278639999985256 \
+  0.14930834590000002793 0.02824752489999998728 0.00000000000000000000 \
+  0.00000000000000000000
 icdf.points = \
   0, 0.001, 0.010, 0.025, 0.050, 0.100,\
   0.999, 0.990, 0.975, 0.950, 0.900, 1
diff --git a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.2.properties b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.2.properties
index dfa3c15..92bac80 100644
--- a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.2.properties
+++ b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.2.properties
@@ -25,7 +25,8 @@ upper = 100
 cdf.points = 100
 cdf.values = 1.0
 pmf.points = 0
-pmf.values = 1e-200
+# BigDecimal.ONE.subtract(new BigDecimal(0.99)).pow(100, MathContext.DECIMAL128)
+pmf.values = 1.000000000000088817841970016428095E-200
 # computed using R version 3.4.4
 cdf.hp.points = 82, 81
 cdf.hp.values = 1.4061271955993513664e-17, 6.1128083336354843707e-19
diff --git a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.3.properties b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.3.properties
index 46b8d97..09cb32c 100644
--- a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.3.properties
+++ b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.3.properties
@@ -23,7 +23,8 @@ upper = 100
 # Set values for the required fields.
 cdf.points = 100
 cdf.values = 1.0
-pmf.values = 1e-200
+# new BigDecimal(0.01).pow(100, MathContext.DECIMAL128)
+pmf.values = 1.000000000000002081668171172170658E-200
 # computed using R version 3.4.4
 sf.hp.points = 18, 19
 sf.hp.values = 6.1128083336353977038e-19, 2.4944165604029235392e-20
diff --git a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.4.properties b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.4.properties
index 1c4c3d6..e459342 100644
--- a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.4.properties
+++ b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.4.properties
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 parameters = 10 0.3
-tolerance.relative = 1e-10
 # 10 * 0.3
 mean = 3
 # 10 * 0.3 * 0.7
@@ -33,6 +32,18 @@ pmf.values = \
   1.2106082099999991575e-01, 2.3347444049999999116e-01, 2.6682793199999993439e-01, 2.0012094900000007569e-01,\
   1.0291934520000002584e-01, 3.6756909000000004273e-02, 9.0016919999999864960e-03, 1.4467005000000008035e-03,\
   1.3778099999999990615e-04, 5.9048999999999949131e-06, 0.0000000000000000000e+00
+# sf(x=9) of 5.9048999999999915249e-06 replaced with exact computation
+# using BinomialDistributionTest.binomialProbability.
+# The values differ by 7 ulp.
+sf.values = \
+  1.0000000000000000000e+00 9.7175247509999995721e-01,\
+  8.5069165410000002758e-01 6.1721721360000025847e-01,\
+  3.5038928159999976897e-01 1.5026833259999988757e-01,\
+  4.7348987399999951931e-02 1.0592078399999996230e-02,\
+  1.5903864000000001930e-03 1.4368589999999998577e-04,\
+  0.000005904899999999997814748020630304745,\
+  0.0000000000000000000e+00,\
+  0.0000000000000000000e+00
 icdf.points = \
   0, 0.001, 0.010, 0.025, 0.050, 0.100,\
   0.999, 0.990, 0.975, 0.950, 0.900, 1

[commons-statistics] 03/06: Fix typo

Posted by ah...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-statistics.git

commit eef97dd1cd94ce086e43d499012832164a89fa88
Author: Alex Herbert <ah...@apache.org>
AuthorDate: Wed Jan 26 01:21:44 2022 +0000

    Fix typo
---
 .../commons/statistics/distribution/SaddlePointExpansionUtils.java      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/SaddlePointExpansionUtils.java b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/SaddlePointExpansionUtils.java
index 07f27cd..c465f42 100644
--- a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/SaddlePointExpansionUtils.java
+++ b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/SaddlePointExpansionUtils.java
@@ -87,7 +87,7 @@ final class SaddlePointExpansionUtils {
      * <p>Note: This function has been modified for integer {@code z}.</p>
      *
      * @param z Value at which the function is evaluated.
-     * @return the Striling's series error.
+     * @return the Stirling's series error.
      */
     static double getStirlingError(int z) {
         if (z <= STIRLING_ERROR_THRESHOLD) {

[commons-statistics] 06/06: Add sample size = 0 test case

Posted by ah...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-statistics.git

commit c636dabf82c8763c1fe60fcb4afa2b8fe7045994
Author: Alex Herbert <ah...@apache.org>
AuthorDate: Wed Jan 26 19:41:25 2022 +0000

    Add sample size = 0 test case
---
 .../distribution/test.hypergeometric.5.properties  | 28 ++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.hypergeometric.5.properties b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.hypergeometric.5.properties
new file mode 100644
index 0000000..c8a24df
--- /dev/null
+++ b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.hypergeometric.5.properties
@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Verify that if sampleSize = 0, mass is concentrated on 0.
+parameters = 5, 3, 0
+mean = 0
+variance = 0
+lower = 0
+upper = 0
+cdf.points = -1, 0, 1, 3, 10
+cdf.values = 0, 1, 1, 1, 1
+pmf.values = 0, 1, 0, 0, 0
+icdf.points = 0.1, 0.5
+icdf.values = 0, 0
+isf.points = 0.9, 0.5
+isf.values = 0, 0

[commons-statistics] 01/06: Test file formatting

Posted by ah...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-statistics.git

commit ae1bb0b5d59db1cda6fde01e68f3dfdd42639ba4
Author: Alex Herbert <ah...@apache.org>
AuthorDate: Tue Jan 25 21:31:04 2022 +0000

    Test file formatting
---
 .../apache/commons/statistics/distribution/test.binomial.6.properties    | 1 +
 .../apache/commons/statistics/distribution/test.binomial.7.properties    | 1 +
 2 files changed, 2 insertions(+)

diff --git a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.6.properties b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.6.properties
index dde438c..a75af56 100644
--- a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.6.properties
+++ b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.6.properties
@@ -12,6 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 # Degenerate n=0 case
 parameters = 0 0.01
 mean = 0
diff --git a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.7.properties b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.7.properties
index 078a956..14f11de 100644
--- a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.7.properties
+++ b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.7.properties
@@ -12,6 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 # Degenerate p=0 case
 parameters = 5 0.0
 mean = 0

[commons-statistics] 02/06: Updated PMF vs log PMF assertion at the support bound

Posted by ah...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-statistics.git

commit 32776c58d64a031f051d1a608ca93873915e58e9
Author: Alex Herbert <ah...@apache.org>
AuthorDate: Wed Jan 26 01:21:30 2022 +0000

    Updated PMF vs log PMF assertion at the support bound
---
 .../distribution/BaseDiscreteDistributionTest.java | 34 +++++++++++++++++++---
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/BaseDiscreteDistributionTest.java b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/BaseDiscreteDistributionTest.java
index a10e731..ccbe909 100644
--- a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/BaseDiscreteDistributionTest.java
+++ b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/BaseDiscreteDistributionTest.java
@@ -884,7 +884,7 @@ abstract class BaseDiscreteDistributionTest
             // the CDF is 1.0 as they may be truncated.
             Assertions.assertEquals(1.0, dist.cumulativeProbability(hi), "cdf(upper)");
             Assertions.assertEquals(0.0, dist.survivalProbability(hi), "sf(upper)");
-            TestUtils.assertEquals(dist.probability(hi), dist.survivalProbability(hi - 1), tolerance, "sf(upper - 1)");
+            TestUtils.assertEquals(dist.probability(hi), dist.survivalProbability(hi - 1), tolerance, () -> "pmf(upper - 1) != sf(upper - 1) for " + hi);
 
             final int above = hi + 1;
             Assertions.assertEquals(0.0, dist.probability(above), "pmf(x > upper)");
@@ -894,10 +894,36 @@ abstract class BaseDiscreteDistributionTest
         }
 
         // Test the logProbability at the support bound. This hits edge case coverage for logProbability.
+        assertPmfAndLogPmfAtBound(dist, lo, tolerance, "lower");
+        assertPmfAndLogPmfAtBound(dist, hi, tolerance, "upper");
+    }
+
+    /**
+     * Assert the PMF and log PMF are consistent at the named point.
+     * This method asserts either:
+     * <pre>
+     * log(pmf) == logpmf
+     * pmf      == exp(logpmf)
+     * </pre>
+     *
+     * @param dist Distribution
+     * @param x Point
+     * @param tolerance Test tolerance
+     * @param name Point name
+     */
+    private static void assertPmfAndLogPmfAtBound(DiscreteDistribution dist, int x,
+                                                  DoubleTolerance tolerance, String name) {
         // It is assumed the log probability may support a value when the plain probability will be zero.
-        // So do not test Math.log(dist.probability(x)) == dist.logProbability(x)
-        TestUtils.assertEquals(dist.probability(lo), Math.exp(dist.logProbability(lo)), tolerance, "pmf(lower) != exp(logpmf(lower))");
-        TestUtils.assertEquals(dist.probability(hi), Math.exp(dist.logProbability(hi)), tolerance, "pmf(upper) != exp(logpmf(upper))");
+        // Only assert Math.log(dist.probability(x)) == dist.logProbability(x) for normal values.
+        final double p = dist.probability(x);
+        final double logp = dist.logProbability(x);
+        if (p > Double.MIN_NORMAL) {
+            TestUtils.assertEquals(Math.log(p), logp, tolerance,
+                () -> String.format("%s: log(pmf(%d)) != logpmf(%d)", name, x, x));
+        } else {
+            TestUtils.assertEquals(p, Math.exp(logp), tolerance,
+                () -> String.format("%s: pmf(%d) != exp(logpmf(%d))", name, x, x));
+        }
     }
 
     /**

[commons-statistics] 04/06: Reuse temp n result

Posted by ah...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-statistics.git

commit 5d984f2964fee91fa01aecec419ae1eb6a6e245b
Author: Alex Herbert <ah...@apache.org>
AuthorDate: Wed Jan 26 10:00:05 2022 +0000

    Reuse temp n result
    
    Remove invalid code comment
---
 .../apache/commons/statistics/distribution/PascalDistribution.java    | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/PascalDistribution.java b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/PascalDistribution.java
index f550c3b..0d71a2c 100644
--- a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/PascalDistribution.java
+++ b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/PascalDistribution.java
@@ -155,8 +155,7 @@ public final class PascalDistribution extends AbstractDiscreteDistribution {
             // overflow
             return Double.NEGATIVE_INFINITY;
         }
-        return LogBinomialCoefficient.value(x +
-              numberOfSuccesses - 1, numberOfSuccesses - 1) +
+        return LogBinomialCoefficient.value(n, numberOfSuccesses - 1) +
               logProbabilityOfSuccessByNumOfSuccesses +
               log1mProbabilityOfSuccess * x;
     }
@@ -177,7 +176,6 @@ public final class PascalDistribution extends AbstractDiscreteDistribution {
         if (x < 0) {
             return 1.0;
         }
-        // Use a helper function to compute the complement of the cumulative probability
         return RegularizedBeta.complement(probabilityOfSuccess,
                                           numberOfSuccesses, x + 1.0);
     }