You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ah...@apache.org on 2021/11/22 08:49:25 UTC
[commons-statistics] 01/04: STATISTICS-47: Add inverse survival probability
This is an automated email from the ASF dual-hosted git repository.
aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-statistics.git
commit 71d8081e71c6a927488ed65541be2b17f0259e15
Author: Alex Herbert <ah...@apache.org>
AuthorDate: Thu Nov 18 00:43:22 2021 +0000
STATISTICS-47: Add inverse survival probability
Add default methods to the distribution interface.
Add default implementations to the abstract distribution classes.
Add tests for the default implementations.
---
.../AbstractContinuousDistribution.java | 138 ++++++++++++++----
.../distribution/AbstractDiscreteDistribution.java | 128 ++++++++++++-----
.../distribution/ContinuousDistribution.java | 25 +++-
.../distribution/DiscreteDistribution.java | 31 +++-
.../AbstractContinuousDistributionTest.java | 158 +++++++++++++++++++--
.../AbstractDiscreteDistributionTest.java | 147 +++++++++++++++----
.../distribution/ContinuousDistributionTest.java | 8 +-
.../distribution/DiscreteDistributionTest.java | 11 ++
src/main/resources/pmd/pmd-ruleset.xml | 7 +
9 files changed, 542 insertions(+), 111 deletions(-)
diff --git a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/AbstractContinuousDistribution.java b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/AbstractContinuousDistribution.java
index eea04aa..178fe4d 100644
--- a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/AbstractContinuousDistribution.java
+++ b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/AbstractContinuousDistribution.java
@@ -16,6 +16,8 @@
*/
package org.apache.commons.statistics.distribution;
+import java.util.function.DoubleBinaryOperator;
+import java.util.function.DoubleUnaryOperator;
import org.apache.commons.numbers.rootfinder.BrentSolver;
import org.apache.commons.rng.UniformRandomProvider;
import org.apache.commons.rng.sampling.distribution.InverseTransformContinuousSampler;
@@ -147,6 +149,39 @@ abstract class AbstractContinuousDistribution
*/
@Override
public double inverseCumulativeProbability(final double p) {
+ ArgumentUtils.checkProbability(p);
+ return inverseProbability(p, 1 - p, false);
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * <p>The default implementation returns:
+ * <ul>
+ * <li>{@link #getSupportLowerBound()} for {@code p = 1},</li>
+ * <li>{@link #getSupportUpperBound()} for {@code p = 0}, or</li>
+ * <li>the result of a search for a root between the lower and upper bound using
+ * {@link #survivalProbability(double) survivalProbability(x) - p}.
+ * The bounds may be bracketed for efficiency.</li>
+ * </ul>
+ *
+ * @throws IllegalArgumentException if {@code p < 0} or {@code p > 1}
+ */
+ @Override
+ public double inverseSurvivalProbability(final double p) {
+ ArgumentUtils.checkProbability(p);
+ return inverseProbability(1 - p, p, true);
+ }
+
+ /**
+ * Implementation for the inverse cumulative or survival probability.
+ *
+ * @param p Cumulative probability.
+ * @param q Survival probability.
+ * @param complement Set to true to compute the inverse survival probability
+ * @return the value
+ */
+ private double inverseProbability(final double p, final double q, boolean complement) {
/* IMPLEMENTATION NOTES
* --------------------
* Where applicable, use is made of the one-sided Chebyshev inequality
@@ -173,17 +208,18 @@ abstract class AbstractContinuousDistribution
* In cases where the Chebyshev inequality does not apply, geometric
* progressions 1, 2, 4, ... and -1, -2, -4, ... are used to bracket
* the root.
+ *
+ * In the case of the survival probability the bracket can be set using the same
+ * bound given that the argument p = 1 - q, with q the survival probability.
*/
- ArgumentUtils.checkProbability(p);
double lowerBound = getSupportLowerBound();
+ double upperBound = getSupportUpperBound();
if (p == 0) {
- return lowerBound;
+ return complement ? upperBound : lowerBound;
}
-
- double upperBound = getSupportUpperBound();
- if (p == 1) {
- return upperBound;
+ if (q == 0) {
+ return complement ? lowerBound : upperBound;
}
final double mu = getMean();
@@ -193,55 +229,103 @@ abstract class AbstractContinuousDistribution
if (lowerBound == Double.NEGATIVE_INFINITY) {
if (chebyshevApplies) {
- lowerBound = mu - sig * Math.sqrt((1 - p) / p);
+ lowerBound = mu - sig * Math.sqrt(q / p);
}
// Bound may have been set as infinite
if (lowerBound == Double.NEGATIVE_INFINITY) {
lowerBound = Math.min(-1, upperBound);
- while (cumulativeProbability(lowerBound) >= p) {
- lowerBound *= 2;
+ if (complement) {
+ while (survivalProbability(lowerBound) < q) {
+ lowerBound *= 2;
+ }
+ } else {
+ while (cumulativeProbability(lowerBound) >= p) {
+ lowerBound *= 2;
+ }
}
+ // Ensure finite
+ lowerBound = Math.max(lowerBound, -Double.MAX_VALUE);
}
}
if (upperBound == Double.POSITIVE_INFINITY) {
if (chebyshevApplies) {
- upperBound = mu + sig * Math.sqrt(p / (1 - p));
+ upperBound = mu + sig * Math.sqrt(p / q);
}
// Bound may have been set as infinite
if (upperBound == Double.POSITIVE_INFINITY) {
upperBound = Math.max(1, lowerBound);
- while (cumulativeProbability(upperBound) < p) {
- upperBound *= 2;
+ if (complement) {
+ while (survivalProbability(upperBound) >= q) {
+ upperBound *= 2;
+ }
+ } else {
+ while (cumulativeProbability(upperBound) < p) {
+ upperBound *= 2;
+ }
}
+ // Ensure finite
+ upperBound = Math.min(upperBound, Double.MAX_VALUE);
}
}
+ final DoubleUnaryOperator fun = complement ?
+ arg -> survivalProbability(arg) - q :
+ arg -> cumulativeProbability(arg) - p;
final double x = new BrentSolver(SOLVER_RELATIVE_ACCURACY,
SOLVER_ABSOLUTE_ACCURACY,
SOLVER_FUNCTION_VALUE_ACCURACY)
- .findRoot(arg -> cumulativeProbability(arg) - p,
+ .findRoot(fun,
lowerBound,
0.5 * (lowerBound + upperBound),
upperBound);
if (!isSupportConnected()) {
- /* Test for plateau. */
- final double dx = SOLVER_ABSOLUTE_ACCURACY;
- if (x - dx >= lowerBound) {
- final double px = cumulativeProbability(x);
- if (cumulativeProbability(x - dx) == px) {
- upperBound = x;
- while (upperBound - lowerBound > dx) {
- final double midPoint = 0.5 * (lowerBound + upperBound);
- if (cumulativeProbability(midPoint) < px) {
- lowerBound = midPoint;
- } else {
- upperBound = midPoint;
- }
+ return searchPlateau(complement, lowerBound, x);
+ }
+ return x;
+ }
+
+ /**
+ * Test the probability function for a plateau at the point x. If detected
+ * search the plateau for the lowest point y such that
+ * {@code inf{y in R | P(y) == P(x)}}.
+ *
+ * <p>This function is used when the distribution support is not connected
+ * to satisfy the inverse probability requirements of {@link ContinuousDistribution}
+ * on the returned value.
+ *
+ * @param complement Set to true to search the survival probability.
+ * @param lower Lower bound used to limit the search downwards.
+ * @param x Current value.
+ * @return the infimum y
+ */
+ private double searchPlateau(boolean complement, double lower, final double x) {
+ /* Test for plateau. Lower the value x if the probability is the same. */
+ final double dx = SOLVER_ABSOLUTE_ACCURACY;
+ if (x - dx >= lower) {
+ final DoubleUnaryOperator fun = complement ?
+ this::survivalProbability :
+ this::cumulativeProbability;
+ final double px = fun.applyAsDouble(x);
+ if (fun.applyAsDouble(x - dx) == px) {
+ double upperBound = x;
+ double lowerBound = lower;
+ // Bisection search
+ // Require cdf(x) < px and sf(x) > px to move the lower bound
+ // to the midpoint.
+ final DoubleBinaryOperator cmp = complement ?
+ (a, b) -> a > b ? -1 : 1 :
+ (a, b) -> a < b ? -1 : 1;
+ while (upperBound - lowerBound > dx) {
+ final double midPoint = 0.5 * (lowerBound + upperBound);
+ if (cmp.applyAsDouble(fun.applyAsDouble(midPoint), px) < 0) {
+ lowerBound = midPoint;
+ } else {
+ upperBound = midPoint;
}
- return upperBound;
}
+ return upperBound;
}
}
return x;
diff --git a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/AbstractDiscreteDistribution.java b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/AbstractDiscreteDistribution.java
index 3a3360f..9c7394a 100644
--- a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/AbstractDiscreteDistribution.java
+++ b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/AbstractDiscreteDistribution.java
@@ -16,6 +16,7 @@
*/
package org.apache.commons.statistics.distribution;
+import java.util.function.IntUnaryOperator;
import org.apache.commons.rng.UniformRandomProvider;
import org.apache.commons.rng.sampling.distribution.InverseTransformDiscreteSampler;
@@ -113,64 +114,111 @@ abstract class AbstractDiscreteDistribution
@Override
public int inverseCumulativeProbability(final double p) {
ArgumentUtils.checkProbability(p);
+ return inverseProbability(p, 1 - p, false);
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * <p>The default implementation returns:
+ * <ul>
+ * <li>{@link #getSupportLowerBound()} for {@code p = 1},</li>
+ * <li>{@link #getSupportUpperBound()} for {@code p = 0}, or</li>
+ * <li>the result of a search for a root between the lower and upper bound using
+ * {@link #survivalProbability(int) survivalProbability(x) - p}.
+ * The bounds may be bracketed for efficiency.</li>
+ * </ul>
+ *
+ * @throws IllegalArgumentException if {@code p < 0} or {@code p > 1}
+ */
+ @Override
+ public int inverseSurvivalProbability(final double p) {
+ ArgumentUtils.checkProbability(p);
+ return inverseProbability(1 - p, p, true);
+ }
+
+ /**
+ * Implementation for the inverse cumulative or survival probability.
+ *
+ * @param p Cumulative probability.
+ * @param q Survival probability.
+ * @param complement Set to true to compute the inverse survival probability
+ * @return the value
+ */
+ private int inverseProbability(final double p, final double q, boolean complement) {
int lower = getSupportLowerBound();
if (p == 0) {
return lower;
}
+ int upper = getSupportUpperBound();
+ if (q == 0) {
+ return upper;
+ }
+
+ // The binary search sets the upper value to the mid-point
+ // based on fun(x) >= 0. The upper value is returned.
+ //
+ // Create a function to search for x where the upper bound can be
+ // lowered if:
+ // cdf(x) >= p
+ // sf(x) <= q
+ final IntUnaryOperator fun = complement ?
+ x -> Double.compare(q, checkedProbability(survivalProbability(x))) :
+ x -> Double.compare(checkedProbability(cumulativeProbability(x)), p);
+
if (lower == Integer.MIN_VALUE) {
- if (checkedCumulativeProbability(lower) >= p) {
+ if (fun.applyAsInt(lower) >= 0) {
return lower;
}
} else {
- lower -= 1; // this ensures cumulativeProbability(lower) < p, which
- // is important for the solving step
- }
-
- int upper = getSupportUpperBound();
- if (p == 1) {
- return upper;
+ // this ensures:
+ // cumulativeProbability(lower) < p
+ // survivalProbability(lower) > q
+ // which is important for the solving step
+ lower -= 1;
}
// use the one-sided Chebyshev inequality to narrow the bracket
- // cf. AbstractRealDistribution.inverseCumulativeProbability(double)
+ // cf. AbstractContinuousDistribution.inverseCumulativeProbability(double)
final double mu = getMean();
- final double sigma = Math.sqrt(getVariance());
+ final double sig = Math.sqrt(getVariance());
final boolean chebyshevApplies = Double.isFinite(mu) &&
- Double.isFinite(sigma) &&
- sigma != 0.0;
+ ArgumentUtils.isFiniteStrictlyPositive(sig);
if (chebyshevApplies) {
- double k = Math.sqrt((1.0 - p) / p);
- double tmp = mu - k * sigma;
+ double tmp = mu - sig * Math.sqrt(q / p);
if (tmp > lower) {
lower = ((int) Math.ceil(tmp)) - 1;
}
- k = 1.0 / k;
- tmp = mu + k * sigma;
+ tmp = mu + sig * Math.sqrt(p / q);
if (tmp < upper) {
upper = ((int) Math.ceil(tmp)) - 1;
}
}
- return solveInverseCumulativeProbability(p, lower, upper);
+ // TODO
+ // Improve the simple bisection to use a faster search,
+ // e.g. a BrentSolver.
+
+ return solveInverseProbability(fun, lower, upper);
}
/**
* This is a utility function used by {@link
- * #inverseCumulativeProbability(double)}. It assumes {@code 0 < p < 1} and
- * that the inverse cumulative probability lies in the bracket {@code
+ * #inverseProbability(double, double, boolean)}. It assumes
+ * that the inverse probability lies in the bracket {@code
* (lower, upper]}. The implementation does simple bisection to find the
- * smallest {@code p}-quantile {@code inf{x in Z | P(X <= x) >= p}}.
+ * smallest {@code x} such that {@code fun(x) >= 0}.
*
- * @param p Cumulative probability.
- * @param lowerBound Value satisfying {@code cumulativeProbability(lower) < p}.
- * @param upperBound Value satisfying {@code p <= cumulativeProbability(upper)}.
- * @return the smallest {@code p}-quantile of this distribution.
+ * @param fun Probability function.
+ * @param lowerBound Value satisfying {@code fun(lower) < 0}.
+ * @param upperBound Value satisfying {@code fun(upper) >= 0}.
+ * @return the smallest x
*/
- private int solveInverseCumulativeProbability(final double p,
- int lowerBound,
- int upperBound) {
+ private static int solveInverseProbability(IntUnaryOperator fun,
+ int lowerBound,
+ int upperBound) {
// Use long to prevent overflow during computation of the middle
long lower = lowerBound;
long upper = upperBound;
@@ -180,28 +228,30 @@ abstract class AbstractDiscreteDistribution
// lower and upper arguments of this method are positive, for
// example, for PoissonDistribution.
final long middle = (lower + upper) / 2;
- final double pm = checkedCumulativeProbability((int) middle);
- if (pm >= p) {
- upper = middle;
- } else {
+ final double pm = fun.applyAsInt((int) middle);
+ if (pm < 0) {
lower = middle;
+ } else {
+ upper = middle;
}
}
return (int) upper;
}
/**
- * Computes the cumulative probability function and checks for {@code NaN}
- * values returned. Rethrows any exception encountered evaluating the cumulative
- * probability function. Throws {@code IllegalStateException} if the cumulative
+ * Checks the probability function result for {@code NaN}.
+ * Throws {@code IllegalStateException} if the cumulative
* probability function returns {@code NaN}.
*
- * @param argument Input value.
- * @return the cumulative probability.
- * @throws IllegalStateException if the cumulative probability is {@code NaN}.
+ * <p>TODO: Q. Is this required? The bisection search will
+ * eventually return if NaN is computed. Check the origin
+ * of this in Commons Math. Its origins may be for debugging.
+ *
+ * @param result Probability
+ * @return the probability.
+ * @throws IllegalStateException if the probability is {@code NaN}.
*/
- private double checkedCumulativeProbability(int argument) {
- final double result = cumulativeProbability(argument);
+ private static double checkedProbability(double result) {
if (Double.isNaN(result)) {
throw new IllegalStateException("Internal error");
}
diff --git a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/ContinuousDistribution.java b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/ContinuousDistribution.java
index ec9493b..1b1c425 100644
--- a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/ContinuousDistribution.java
+++ b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/ContinuousDistribution.java
@@ -86,8 +86,8 @@ public interface ContinuousDistribution {
* to this distribution, this method returns {@code P(X > x)}.
* In other words, this method represents the complementary cumulative
* distribution function.
- * <p>
- * By default, this is defined as {@code 1 - cumulativeProbability(x)}, but
+ *
+ * <p>By default, this is defined as {@code 1 - cumulativeProbability(x)}, but
* the specific implementation may be more accurate.
*
* @param x Point at which the survival function is evaluated.
@@ -115,6 +115,27 @@ public interface ContinuousDistribution {
double inverseCumulativeProbability(double p);
/**
+ * Computes the inverse survival probability function of this distribution. For a random
+ * variable {@code X} distributed according to this distribution, the
+ * returned value is
+ * <ul>
+ * <li>{@code inf{x in R | P(X>=x) <= p}} for {@code 0 <= p < 1},</li>
+ * <li>{@code inf{x in R | P(X>=x) < 1}} for {@code p = 1}.</li>
+ * </ul>
+ *
+ * <p>By default, this is defined as {@code inverseCumulativeProbability(1 - p)}, but
+ * the specific implementation may be more accurate.
+ *
+ * @param p Survival probability.
+ * @return the smallest {@code (1-p)}-quantile of this distribution
+ * (largest 0-quantile for {@code p = 1}).
+ * @throws IllegalArgumentException if {@code p < 0} or {@code p > 1}.
+ */
+ default double inverseSurvivalProbability(double p) {
+ return inverseCumulativeProbability(1 - p);
+ }
+
+ /**
* Gets the mean of this distribution.
*
* @return the mean, or {@code Double.NaN} if it is not defined.
diff --git a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/DiscreteDistribution.java b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/DiscreteDistribution.java
index 05681a1..7cfcb4e 100644
--- a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/DiscreteDistribution.java
+++ b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/DiscreteDistribution.java
@@ -95,8 +95,8 @@ public interface DiscreteDistribution {
* to this distribution, this method returns {@code P(X > x)}.
* In other words, this method represents the complementary cumulative
* distribution function.
- * <p>
- * By default, this is defined as {@code 1 - cumulativeProbability(x)}, but
+ *
+ * <p>By default, this is defined as {@code 1 - cumulativeProbability(x)}, but
* the specific implementation may be more accurate.
*
* @param x Point at which the survival function is evaluated.
@@ -115,7 +115,7 @@ public interface DiscreteDistribution {
* <li>{@code inf{x in Z | P(X<=x) >= p}} for {@code 0 < p <= 1},</li>
* <li>{@code inf{x in Z | P(X<=x) > 0}} for {@code p = 0}.</li>
* </ul>
- * If the result exceeds the range of the data type {@code int},
+ * <p>If the result exceeds the range of the data type {@code int},
* then {@code Integer.MIN_VALUE} or {@code Integer.MAX_VALUE} is returned.
* In this case the result of {@link #cumulativeProbability(int)} called
* using the returned {@code p}-quantile may not compute the original {@code p}.
@@ -128,6 +128,31 @@ public interface DiscreteDistribution {
int inverseCumulativeProbability(double p);
/**
+ * Computes the inverse survival probability function of this distribution.
+ * For a random variable {@code X} distributed according to this distribution,
+ * the returned value is
+ * <ul>
+ * <li>{@code inf{x in R | P(X>=x) <= p}} for {@code 0 <= p < 1},</li>
+ * <li>{@code inf{x in R | P(X>=x) < 1}} for {@code p = 1}.</li>
+ * </ul>
+ * <p>If the result exceeds the range of the data type {@code int},
+ * then {@code Integer.MIN_VALUE} or {@code Integer.MAX_VALUE} is returned.
+ * In this case the result of {@link #survivalProbability(int)} called
+ * using the returned {@code (1-p)}-quantile may not compute the original {@code p}.
+ *
+ * <p>By default, this is defined as {@code inverseCumulativeProbability(1 - p)}, but
+ * the specific implementation may be more accurate.
+ *
+ * @param p Cumulative probability.
+ * @return the smallest {@code (1-p)}-quantile of this distribution
+ * (largest 0-quantile for {@code p = 1}).
+ * @throws IllegalArgumentException if {@code p < 0} or {@code p > 1}.
+ */
+ default int inverseSurvivalProbability(double p) {
+ return inverseCumulativeProbability(1 - p);
+ }
+
+ /**
* Gets the mean of this distribution.
*
* @return the mean, or {@code Double.NaN} if it is not defined.
diff --git a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/AbstractContinuousDistributionTest.java b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/AbstractContinuousDistributionTest.java
index 463d4f3..a43395d 100644
--- a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/AbstractContinuousDistributionTest.java
+++ b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/AbstractContinuousDistributionTest.java
@@ -22,9 +22,12 @@ import org.apache.commons.math3.analysis.integration.UnivariateIntegrator;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
-/** Various tests related to MATH-699. */
+/**
+ * Test cases for AbstractContinuousDistribution default implementations.
+ */
class AbstractContinuousDistributionTest {
+ /** Various tests related to MATH-699. */
@Test
void testContinuous() {
final double x0 = 0.0;
@@ -95,11 +98,34 @@ class AbstractContinuousDistributionTest {
return false;
}
};
- final double expected = x1;
- final double actual = distribution.inverseCumulativeProbability(p12);
- Assertions.assertEquals(expected, actual, 1e-8);
+ // CDF is continuous before x1 and after x2. Plateau in between:
+ // CDF(x1 <= X <= x2) = p12.
+ // The inverse returns the infimum.
+ double expected = x1;
+ double actual = distribution.inverseCumulativeProbability(p12);
+ Assertions.assertEquals(expected, actual, 1e-8, "Inverse CDF");
+
+ actual = distribution.inverseSurvivalProbability(1 - p12);
+ Assertions.assertEquals(expected, actual, 1e-8, "Inverse SF");
+
+ // Test the continuous region
+ expected = 0.5 * (x1 - x0);
+ actual = distribution.inverseCumulativeProbability(p12 * 0.5);
+ Assertions.assertEquals(expected, actual, 1e-8, "Inverse CDF");
+
+ actual = distribution.inverseSurvivalProbability(1 - p12 * 0.5);
+ Assertions.assertEquals(expected, actual, 1e-8, "Inverse SF");
+
+ // Edge case where the result is within the solver accuracy of the lower bound
+ expected = x0;
+ actual = distribution.inverseCumulativeProbability(Double.MIN_VALUE);
+ Assertions.assertEquals(expected, actual, 1e-8, "Inverse CDF");
+
+ actual = distribution.inverseSurvivalProbability(Math.nextDown(1.0));
+ Assertions.assertEquals(expected, actual, 1e-8, "Inverse SF");
}
+ /** Various tests related to MATH-699. */
@Test
void testDiscontinuous() {
final double x0 = 0.0;
@@ -177,9 +203,31 @@ class AbstractContinuousDistributionTest {
return false;
}
};
- final double expected = x2;
- final double actual = distribution.inverseCumulativeProbability(p23);
- Assertions.assertEquals(expected, actual, 1e-8);
+ // CDF continuous before x1 and after x3. Two plateuas in between stepped at x2:
+ // CDF(x1 <= X <= x2) = p12.
+ // CDF(x2 <= X <= x3) = p23. The inverse returns the infimum.
+ double expected = x2;
+ double actual = distribution.inverseCumulativeProbability(p23);
+ Assertions.assertEquals(expected, actual, 1e-8, "Inverse CDF");
+
+ actual = distribution.inverseSurvivalProbability(1 - p23);
+ Assertions.assertEquals(expected, actual, 1e-8, "Inverse SF");
+
+ // Test the continuous region
+ expected = 0.5 * (x1 - x0);
+ actual = distribution.inverseCumulativeProbability(p12 * 0.5);
+ Assertions.assertEquals(expected, actual, 1e-8, "Inverse CDF");
+
+ actual = distribution.inverseSurvivalProbability(1 - p12 * 0.5);
+ Assertions.assertEquals(expected, actual, 1e-8, "Inverse SF");
+
+ // Edge case where the result is within the solver accuracy of the lower bound
+ expected = x0;
+ actual = distribution.inverseCumulativeProbability(Double.MIN_VALUE);
+ Assertions.assertEquals(expected, actual, 1e-8, "Inverse CDF");
+
+ actual = distribution.inverseSurvivalProbability(Math.nextDown(1.0));
+ Assertions.assertEquals(expected, actual, 1e-8, "Inverse SF");
}
/**
@@ -208,7 +256,7 @@ class AbstractContinuousDistributionTest {
@Override
public double density(final double x) {
- return x == x0 ? Double.POSITIVE_INFINITY : 0.0;
+ throw new AssertionError();
}
@Override
@@ -236,11 +284,99 @@ class AbstractContinuousDistributionTest {
return true;
}
};
- final double x = distribution.inverseCumulativeProbability(0.5);
+ double x = distribution.inverseCumulativeProbability(0.5);
// The value can be anything other than x0
- Assertions.assertNotEquals(x0, x);
+ Assertions.assertNotEquals(x0, x, "Inverse CDF");
// Ideally it would be the next value after x0 but accuracy is dependent
// on the tolerance of the solver
- Assertions.assertEquals(x0, x, 1e-8);
+ Assertions.assertEquals(x0, x, 1e-8, "Inverse CDF");
+
+ // The same functionality should be supported for the inverse survival probability
+ x = distribution.inverseSurvivalProbability(0.5);
+ Assertions.assertNotEquals(x0, x, "Inverse SF");
+ Assertions.assertEquals(x0, x, 1e-8, "Inverse SF");
+ }
+
+ /**
+ * Test infinite variance. This invalidates the Chebyshev inequality.
+ *
+ * <p>This test verifies the solver reverts to manual bracketing and raises no exception.
+ */
+ @Test
+ void testInfiniteVariance() {
+ // Distribution must have an infinite bound and infinite variance.
+ // This is an invalid case for the Chebyshev inequality.
+
+ // Create a triangle distribution: (a, c, b); a=lower, c=mode, b=upper
+ // (-10, 0, 10)
+ // Area of the first triangle [-10, 0] is set assuming the height is 10
+ // => 10 * 10 * 2 / 2 = 100
+ // Length of triangle to achieve half the area:
+ // x = sqrt(50) = 7.07..
+
+ final AbstractContinuousDistribution distribution;
+ distribution = new AbstractContinuousDistribution() {
+ @Override
+ public double cumulativeProbability(final double x) {
+ if (x > 0) {
+ // Use symmetry for the upper triangle
+ return 1 - cumulativeProbability(-x);
+ }
+ if (x < -10) {
+ return 0;
+ }
+ return Math.pow(x + 10, 2) / 200;
+ }
+
+ @Override
+ public double density(final double x) {
+ throw new AssertionError();
+ }
+
+ @Override
+ public double getMean() {
+ return 0;
+ }
+
+ @Override
+ public double getVariance() {
+ // Report variance incorrectly
+ return Double.POSITIVE_INFINITY;
+ }
+
+ @Override
+ public double getSupportLowerBound() {
+ // Report lower bound incorrectly (it should be -10) to test cdf(0)
+ return Double.NEGATIVE_INFINITY;
+ }
+
+ @Override
+ public double getSupportUpperBound() {
+ // Report upper bound incorrectly (it should be 10) to test cdf(1)
+ return Double.POSITIVE_INFINITY;
+ }
+
+ @Override
+ public boolean isSupportConnected() {
+ return true;
+ }
+ };
+
+ // Accuracy is dependent on the tolerance of the solver
+ final double tolerance = 1e-8;
+
+ final double x = Math.sqrt(50);
+ Assertions.assertEquals(Double.NEGATIVE_INFINITY, distribution.inverseCumulativeProbability(0), "Inverse CDF");
+ Assertions.assertEquals(x - 10, distribution.inverseCumulativeProbability(0.25), tolerance, "Inverse CDF");
+ Assertions.assertEquals(0, distribution.inverseCumulativeProbability(0.5), tolerance, "Inverse CDF");
+ Assertions.assertEquals(10 - x, distribution.inverseCumulativeProbability(0.75), tolerance, "Inverse CDF");
+ Assertions.assertEquals(Double.POSITIVE_INFINITY, distribution.inverseCumulativeProbability(1), "Inverse CDF");
+
+ // The same functionality should be supported for the inverse survival probability
+ Assertions.assertEquals(Double.NEGATIVE_INFINITY, distribution.inverseSurvivalProbability(1), "Inverse CDF");
+ Assertions.assertEquals(x - 10, distribution.inverseSurvivalProbability(0.75), tolerance, "Inverse SF");
+ Assertions.assertEquals(0, distribution.inverseSurvivalProbability(0.5), tolerance, "Inverse SF");
+ Assertions.assertEquals(10 - x, distribution.inverseSurvivalProbability(0.25), tolerance, "Inverse SF");
+ Assertions.assertEquals(Double.POSITIVE_INFINITY, distribution.inverseSurvivalProbability(0), "Inverse CDF");
}
}
diff --git a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/AbstractDiscreteDistributionTest.java b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/AbstractDiscreteDistributionTest.java
index 3a49ff3..d27bdfb 100644
--- a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/AbstractDiscreteDistributionTest.java
+++ b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/AbstractDiscreteDistributionTest.java
@@ -16,6 +16,7 @@
*/
package org.apache.commons.statistics.distribution;
+import java.util.stream.IntStream;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
@@ -24,40 +25,53 @@ import org.junit.jupiter.api.Test;
*/
class AbstractDiscreteDistributionTest {
private final DiceDistribution diceDistribution = new DiceDistribution();
- private final double p = diceDistribution.probability(1);
@Test
void testInverseCumulativeProbabilityMethod() {
- final double precision = 0.000000000000001;
+ // This must be consistent with its own cumulative probability
+ final double[] p = IntStream.rangeClosed(1, 6).mapToDouble(diceDistribution::cumulativeProbability).toArray();
+ Assertions.assertEquals(1.0, p[5], "Incorrect cumulative probability at upper bound");
Assertions.assertEquals(1, diceDistribution.inverseCumulativeProbability(0));
- Assertions.assertEquals(1, diceDistribution.inverseCumulativeProbability((1d - Double.MIN_VALUE) / 6d));
- Assertions.assertEquals(2, diceDistribution.inverseCumulativeProbability((1d + precision) / 6d));
- Assertions.assertEquals(2, diceDistribution.inverseCumulativeProbability((2d - Double.MIN_VALUE) / 6d));
- Assertions.assertEquals(3, diceDistribution.inverseCumulativeProbability((2d + precision) / 6d));
- Assertions.assertEquals(3, diceDistribution.inverseCumulativeProbability((3d - Double.MIN_VALUE) / 6d));
- Assertions.assertEquals(4, diceDistribution.inverseCumulativeProbability((3d + precision) / 6d));
- Assertions.assertEquals(4, diceDistribution.inverseCumulativeProbability((4d - Double.MIN_VALUE) / 6d));
- Assertions.assertEquals(5, diceDistribution.inverseCumulativeProbability((4d + precision) / 6d));
- Assertions.assertEquals(5, diceDistribution.inverseCumulativeProbability((5d - precision) / 6d)); //Can't use Double.MIN
- Assertions.assertEquals(6, diceDistribution.inverseCumulativeProbability((5d + precision) / 6d));
- Assertions.assertEquals(6, diceDistribution.inverseCumulativeProbability((6d - precision) / 6d)); //Can't use Double.MIN
- Assertions.assertEquals(6, diceDistribution.inverseCumulativeProbability(1d));
+ for (int i = 0; i < 6; i++) {
+ final int x = i + 1;
+ Assertions.assertEquals(x, diceDistribution.inverseCumulativeProbability(Math.nextDown(p[i])));
+ Assertions.assertEquals(x, diceDistribution.inverseCumulativeProbability(p[i]));
+ if (x < 6) {
+ Assertions.assertEquals(x + 1, diceDistribution.inverseCumulativeProbability(Math.nextUp(p[i])));
+ }
+ }
+ }
+
+ @Test
+ void testInverseSurvivalProbabilityMethod() {
+ // This must be consistent with its own survival probability
+ final double[] p = IntStream.rangeClosed(1, 6).mapToDouble(diceDistribution::survivalProbability).toArray();
+ Assertions.assertEquals(0.0, p[5], "Incorrect survival probability at upper bound");
+ Assertions.assertEquals(1, diceDistribution.inverseSurvivalProbability(1));
+ for (int i = 0; i < 6; i++) {
+ final int x = i + 1;
+ Assertions.assertEquals(x, diceDistribution.inverseSurvivalProbability(Math.nextUp(p[i])));
+ Assertions.assertEquals(x, diceDistribution.inverseSurvivalProbability(p[i]));
+ if (x < 6) {
+ Assertions.assertEquals(x + 1, diceDistribution.inverseSurvivalProbability(Math.nextDown(p[i])));
+ }
+ }
}
@Test
void testCumulativeProbabilitiesSingleArguments() {
- for (int i = 1; i < 7; i++) {
+ final double p = diceDistribution.probability(1);
+ for (int i = 1; i <= 6; i++) {
Assertions.assertEquals(p * i,
- diceDistribution.cumulativeProbability(i), Double.MIN_VALUE);
+ diceDistribution.cumulativeProbability(i), Math.ulp(p * i));
}
- Assertions.assertEquals(0.0,
- diceDistribution.cumulativeProbability(0), Double.MIN_VALUE);
- Assertions.assertEquals(1.0,
- diceDistribution.cumulativeProbability(7), Double.MIN_VALUE);
+ Assertions.assertEquals(0.0, diceDistribution.cumulativeProbability(0));
+ Assertions.assertEquals(1.0, diceDistribution.cumulativeProbability(7));
}
@Test
void testProbabilitiesRangeArguments() {
+ final double p = diceDistribution.probability(1);
int lower = 0;
int upper = 6;
for (int i = 0; i < 2; i++) {
@@ -76,22 +90,34 @@ class AbstractDiscreteDistributionTest {
void testInverseCumulativeProbabilityExtremes() {
// Require a lower bound of MIN_VALUE and the cumulative probability
// at that bound to be lower/higher than the argument cumulative probability.
+ // Use a uniform distribution so that the default search is supported
+ // for the inverse.
final DiscreteDistribution dist = new AbstractDiscreteDistribution() {
@Override
public double probability(int x) {
- return 0;
+ throw new AssertionError();
}
@Override
public double cumulativeProbability(int x) {
- return x == Integer.MIN_VALUE ? 0.1 : 1.0;
+ final int y = x - Integer.MIN_VALUE;
+ if (y == 0) {
+ return 0.25;
+ } else if (y == 1) {
+ return 0.5;
+ } else if (y == 2) {
+ return 0.75;
+ } else {
+ return 1.0;
+ }
}
@Override
public double getMean() {
- return 0;
+ return 1.5 + Integer.MIN_VALUE;
}
@Override
public double getVariance() {
- return 0;
+ // n = upper - lower + 1, the variance is (n^2 - 1) / 12
+ return 15.0 / 12;
}
@Override
public int getSupportLowerBound() {
@@ -99,19 +125,83 @@ class AbstractDiscreteDistributionTest {
}
@Override
public int getSupportUpperBound() {
- return 42;
+ return Integer.MIN_VALUE + 3;
}
@Override
public boolean isSupportConnected() {
- return false;
+ throw new AssertionError();
}
};
+ Assertions.assertEquals(dist.getSupportLowerBound(), dist.inverseCumulativeProbability(0.0));
Assertions.assertEquals(Integer.MIN_VALUE, dist.inverseCumulativeProbability(0.05));
+ Assertions.assertEquals(Integer.MIN_VALUE + 1, dist.inverseCumulativeProbability(0.35));
+ Assertions.assertEquals(Integer.MIN_VALUE + 2, dist.inverseCumulativeProbability(0.55));
Assertions.assertEquals(dist.getSupportUpperBound(), dist.inverseCumulativeProbability(1.0));
+
+ Assertions.assertEquals(dist.getSupportLowerBound(), dist.inverseSurvivalProbability(1.0));
+ Assertions.assertEquals(Integer.MIN_VALUE, dist.inverseSurvivalProbability(0.95));
+ Assertions.assertEquals(Integer.MIN_VALUE + 1, dist.inverseSurvivalProbability(0.65));
+ Assertions.assertEquals(Integer.MIN_VALUE + 2, dist.inverseSurvivalProbability(0.45));
+ Assertions.assertEquals(dist.getSupportUpperBound(), dist.inverseSurvivalProbability(0.0));
+ }
+
+ @Test
+ void testInverseCumulativeProbabilityWithNoMean() {
+ // A NaN mean will invalidate the Chebyshev inequality
+ // to prevent bracketing
+ final DiscreteDistribution dist = new AbstractDiscreteDistribution() {
+ @Override
+ public double probability(int x) {
+ throw new AssertionError();
+ }
+ @Override
+ public double cumulativeProbability(int x) {
+ if (x == 0) {
+ return 0.25;
+ } else if (x == 1) {
+ return 0.5;
+ } else if (x == 2) {
+ return 0.75;
+ } else {
+ return 1.0;
+ }
+ }
+ @Override
+ public double getMean() {
+ return Double.NaN;
+ }
+ @Override
+ public double getVariance() {
+ return Double.NaN;
+ }
+ @Override
+ public int getSupportLowerBound() {
+ return 0;
+ }
+ @Override
+ public int getSupportUpperBound() {
+ return 3;
+ }
+ @Override
+ public boolean isSupportConnected() {
+ throw new AssertionError();
+ }
+ };
+ Assertions.assertEquals(dist.getSupportLowerBound(), dist.inverseCumulativeProbability(0.0));
+ Assertions.assertEquals(0, dist.inverseCumulativeProbability(0.05));
+ Assertions.assertEquals(1, dist.inverseCumulativeProbability(0.35));
+ Assertions.assertEquals(2, dist.inverseCumulativeProbability(0.55));
+ Assertions.assertEquals(dist.getSupportUpperBound(), dist.inverseCumulativeProbability(1.0));
+
+ Assertions.assertEquals(dist.getSupportLowerBound(), dist.inverseSurvivalProbability(1.0));
+ Assertions.assertEquals(0, dist.inverseSurvivalProbability(0.95));
+ Assertions.assertEquals(1, dist.inverseSurvivalProbability(0.65));
+ Assertions.assertEquals(2, dist.inverseSurvivalProbability(0.45));
+ Assertions.assertEquals(dist.getSupportUpperBound(), dist.inverseSurvivalProbability(0.0));
}
@Test
- void testInverseCumulativeProbabilityWithNaN() {
+ void testInverseProbabilityWithNaN() {
final DiscreteDistribution dist = new AbstractDiscreteDistribution() {
@Override
public double probability(int x) {
@@ -144,6 +234,7 @@ class AbstractDiscreteDistributionTest {
}
};
Assertions.assertThrows(IllegalStateException.class, () -> dist.inverseCumulativeProbability(0.5));
+ Assertions.assertThrows(IllegalStateException.class, () -> dist.inverseSurvivalProbability(0.5));
}
/**
@@ -168,7 +259,7 @@ class AbstractDiscreteDistributionTest {
} else if (x >= 6) {
return 1;
} else {
- return p * x;
+ return x / 6d;
}
}
diff --git a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/ContinuousDistributionTest.java b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/ContinuousDistributionTest.java
index 8158a16..8e6d1f5 100644
--- a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/ContinuousDistributionTest.java
+++ b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/ContinuousDistributionTest.java
@@ -39,7 +39,8 @@ class ContinuousDistributionTest {
}
@Override
public double inverseCumulativeProbability(double p) {
- return 0;
+ // For the default inverseSurvivalProbability(double) method
+ return 10 * p;
}
@Override
public double getVariance() {
@@ -82,5 +83,10 @@ class ContinuousDistributionTest {
// Should throw for bad range
Assertions.assertThrows(DistributionException.class, () -> dist.probability(0.5, 0.4));
Assertions.assertEquals(high - low, dist.probability(0.5, 1.5));
+ Assertions.assertEquals(high - low, dist.probability(0.5, 1.5));
+ for (final double p : new double[] {0.2, 0.5, 0.7}) {
+ Assertions.assertEquals(dist.inverseCumulativeProbability(1 - p),
+ dist.inverseSurvivalProbability(p));
+ }
}
}
diff --git a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/DiscreteDistributionTest.java b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/DiscreteDistributionTest.java
index 90437a7..17a261c 100644
--- a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/DiscreteDistributionTest.java
+++ b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/DiscreteDistributionTest.java
@@ -45,6 +45,12 @@ class DiscreteDistributionTest {
}
return x > 5 ? 1.0 : 0.75;
}
+
+ @Override
+ public int inverseCumulativeProbability(double p) {
+ // For the default inverseSurvivalProbability(double) method
+ return (int) (10 * p);
+ }
};
for (final int x : new int[] {Integer.MIN_VALUE, -1, 0, 1, 2, Integer.MAX_VALUE}) {
@@ -53,6 +59,11 @@ class DiscreteDistributionTest {
// Must return 1 - CDF(x)
Assertions.assertEquals(1.0 - dist.cumulativeProbability(x), dist.survivalProbability(x));
}
+
+ for (final double p : new double[] {0.2, 0.5, 0.7}) {
+ Assertions.assertEquals(dist.inverseCumulativeProbability(1 - p),
+ dist.inverseSurvivalProbability(p));
+ }
}
/**
diff --git a/src/main/resources/pmd/pmd-ruleset.xml b/src/main/resources/pmd/pmd-ruleset.xml
index a8e3fa8..e4f6059 100644
--- a/src/main/resources/pmd/pmd-ruleset.xml
+++ b/src/main/resources/pmd/pmd-ruleset.xml
@@ -109,6 +109,13 @@
value="//ClassOrInterfaceDeclaration[@Image='AbstractContinuousDistribution']"/>
</properties>
</rule>
+ <rule ref="category/java/design.xml/ExcessiveMethodLength">
+ <properties>
+ <!-- Method length is due to comments. -->
+ <property name="violationSuppressXPath"
+ value="//ClassOrInterfaceDeclaration[@Image='AbstractContinuousDistribution']"/>
+ </properties>
+ </rule>
<rule ref="category/java/errorprone.xml/AvoidLiteralsInIfCondition">
<properties>