You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ah...@apache.org on 2022/11/22 17:54:24 UTC

[commons-statistics] branch master updated (b6c6c72 -> af90339)

This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/commons-statistics.git


    from b6c6c72  Use MathJax for inverse probability definition
     new f099867  Update supported bounds to use MathJax
     new 8b0187a  Add javadoc link to the Commons RNG project
     new 17ad2b9  Correct javadoc parameter names
     new 857cff2  Removed TODO comment in discrete distribution inverse probability method
     new af90339  Extra density values for chi2 distribution with large degrees of freedom

The 5 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../distribution/AbstractDiscreteDistribution.java |  4 --
 .../distribution/ContinuousDistribution.java       |  4 +-
 .../distribution/DiscreteDistribution.java         |  4 +-
 .../distribution/HypergeometricDistribution.java   | 43 ++++++------
 .../distribution/ChiSquaredDistributionTest.java   | 81 +++++++++++++++++++---
 pom.xml                                            |  2 +
 6 files changed, 98 insertions(+), 40 deletions(-)


[commons-statistics] 05/05: Extra density values for chi2 distribution with large degrees of freedom

Posted by ah...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-statistics.git

commit af90339b6c810d8247844023c9ea9e980552930f
Author: aherbert <ah...@apache.org>
AuthorDate: Tue Nov 22 17:53:08 2022 +0000

    Extra density values for chi2 distribution with large degrees of freedom
---
 .../distribution/ChiSquaredDistributionTest.java   | 81 +++++++++++++++++++---
 1 file changed, 70 insertions(+), 11 deletions(-)

diff --git a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/ChiSquaredDistributionTest.java b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/ChiSquaredDistributionTest.java
index 6917a97..a979b14 100644
--- a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/ChiSquaredDistributionTest.java
+++ b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/ChiSquaredDistributionTest.java
@@ -55,13 +55,14 @@ class ChiSquaredDistributionTest extends BaseContinuousDistributionTest {
 
     @ParameterizedTest
     @MethodSource
-    void testAdditionalDensity(double df, double[] points, double[] values) {
-        // Values have many digits above the decimal point so use relative tolerance
-        final DoubleTolerance tol = createRelTolerance(5e-14);
+    void testAdditionalDensity(double df, double[] points, double[] values, DoubleTolerance tol) {
         testDensity(ChiSquaredDistribution.of(df), points, values, tol);
     }
 
     static Stream<Arguments> testAdditionalDensity() {
+        // Values have many digits above the decimal point so use relative tolerance
+        final DoubleTolerance tol = DoubleTolerances.relative(5e-14);
+
         // R 2.5:
         // x <- c(-0.1, 1e-6, 0.5, 1, 2, 5)
         final double[] x = new double[]{-0.1, 1e-6, 0.5, 1, 2, 5};
@@ -69,27 +70,85 @@ class ChiSquaredDistributionTest extends BaseContinuousDistributionTest {
             // print(dchisq(x, df=1), digits=17)
             Arguments.of(1, x, new double[] {
                 0, 398.942080930342626743, 0.439391289467722435, 0.241970724519143365,
-                0.103776874355148693, 0.014644982561926489}),
+                0.103776874355148693, 0.014644982561926489}, tol),
             // print(dchisq(x, df=0.1), digits=17)
             Arguments.of(0.1, x, new double[] {
                 0, 2.4864539972849805e+04, 7.4642387316120481e-02,
-                3.0090777182393683e-02, 9.4472991589506262e-03, 8.8271993957607896e-04}),
+                3.0090777182393683e-02, 9.4472991589506262e-03, 8.8271993957607896e-04}, tol),
             // print(dchisq(x, df=2), digits=17)
             Arguments.of(2, x, new double[] {
                 0, 0.49999975000006253, 0.38940039153570244,
-                0.30326532985631671, 0.18393972058572117, 0.04104249931194940}),
+                0.30326532985631671, 0.18393972058572117, 0.04104249931194940}, tol),
             // print(dchisq(x, df=10), digits=17)
             Arguments.of(10, x, new double[] {
                 0, 1.3020826822918329e-27, 6.3378969976514082e-05,
-                7.8975346316749191e-04, 7.6641550244050524e-03, 6.6800942890542614e-02}),
+                7.8975346316749191e-04, 7.6641550244050524e-03, 6.6800942890542614e-02}, tol),
             // print(dchisq(x, df=100), digits=17)
             Arguments.of(100, x, new double[] {
                 0, 0.0000000000000000e+00, 2.0200026568141969e-93,
-                8.8562141121618944e-79, 3.0239224849774644e-64, 2.1290671364111626e-45})
+                8.8562141121618944e-79, 3.0239224849774644e-64, 2.1290671364111626e-45}, tol),
+
+            // Progressively larger degrees of freedom (df) with values around the mean (df).
+            // Note that the CDF tends towards a step function (0 to 1) around the mean, the
+            // density is tiny and the computation has large cancellation leading to inaccuracy.
+            // Note that R's dchisq and matlab's chi2pdf computations are close and have a
+            // similar error to the scipy result as the current java code.
 
-            // TODO:
-            // Add more density checks with large DF and x points around the mean
-            // and into overflow for the underlying Gamma distribution.
+            // scipy.stats 1.9.1
+            // chi2.pdf([250, 500, 1000, 1500, 2000, 2500], 1000)
+            Arguments.of(1000,
+                new double[] {250, 500, 1000, 1500, 2000, 2500},
+                new double[] {
+                    2.4144472784936886e-140, 2.0416219443308211e-044,
+                    8.9191339347531283e-003, 1.7629519620803219e-023,
+                    1.0400388688836408e-069, 6.3317766286480137e-130
+                }, DoubleTolerances.relative(5e-13)),
+            // chi2.pdf([7000, 8000, 9000, 10000, 11000, 12000, 14000], 10000)
+            Arguments.of(10000,
+                new double[] {7000, 8000, 9000, 10000, 11000, 12000, 14000},
+                new double[] {
+                    3.4451863344803051e-126, 1.9575583029092260e-053,
+                    7.1768433769351565e-015, 2.8209009023369056e-003,
+                    1.6794999068429000e-013, 9.6151246453108889e-042,
+                    2.2671193242727422e-141
+                }, DoubleTolerances.relative(5e-12)),
+            // chi2.pdf([90000, 93000, 97000, 100000, 103000, 106000, 110000], 100000)
+            Arguments.of(100000,
+                new double[] {90000, 93000, 97000, 100000, 103000, 106000, 110000},
+                new double[] {
+                    3.9267505859463047e-120, 1.4455254881569810e-059,
+                    9.8188082087821045e-014, 8.9206057128873189e-004,
+                    2.2754616642908043e-013, 2.1622505071910235e-041,
+                    1.1771994592645341e-105
+                }, DoubleTolerances.relative(2e-10)),
+            // chi2.pdf([970000, 980000, 990000, 1000000, 1010000, 1020000, 1030000], 1000000)
+            Arguments.of(1000000,
+                new double[] {970000, 980000, 990000, 1000000, 1010000, 1020000, 1030000},
+                new double[] {
+                    5.5973818150499247e-104, 2.7658855125007757e-048,
+                    3.3455543700508753e-015, 2.8209474455352760e-004,
+                    4.5767314194128692e-015, 3.8269849102866545e-047,
+                    4.2922183625107689e-100
+                }, DoubleTolerances.relative(1e-9)),
+            // chi2.pdf([9889000, 9890000, 9900000, 10000000, 10100000, 10150000], 10000000)
+            Arguments.of(10000000,
+                new double[] {9889000, 9890000, 9900000, 10000000, 10100000, 10150000},
+                new double[] {
+                    1.5256172522921843e-139, 4.0708897874504053e-137,
+                    4.4858296398343578e-114, 8.9206205026501391e-005,
+                    1.2327974895005824e-112, 1.1722829145179983e-246
+                }, DoubleTolerances.relative(2e-8)),
+            // chi2.pdf([1e9-1e7, 1e9-1e6-1e5, 1e9-1e6-1e3, 1e9-1e6, 1e9, 1e9+1e6, 1e9+1e6+1e3, 1e9+1e6+1e5, 1e9+1e7], 1e9)
+            Arguments.of(1e9,
+                new double[] {1e9 - 1e7, 1e9 - 1e6 - 1e5, 1e9 - 1e6 - 1e3, 1e9 - 1e6, 1e9,
+                    1e9 + 1e6, 1e9 + 1e6 + 1e3, 1e9 + 1e6 + 1e5, 1e9 + 1e7},
+                new double[] {
+                    0.0000000000000000e+000, 3.0225312085986285e-137,
+                    1.2226420014038719e-114, 2.0173074570506826e-114,
+                    8.9206087939511128e-006, 2.8097537346819163e-114,
+                    1.7046276559389835e-114, 4.7000061695431404e-137,
+                    0.0000000000000000e+000
+                }, DoubleTolerances.relative(3e-6))
         );
     }
 }


[commons-statistics] 02/05: Add javadoc link to the Commons RNG project

Posted by ah...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-statistics.git

commit 8b0187add140a680dd742ad8cea3e16d4283afe5
Author: aherbert <ah...@apache.org>
AuthorDate: Tue Nov 22 15:58:41 2022 +0000

    Add javadoc link to the Commons RNG project
    
    The UniformRandomProvider interface is part of distribution package's
    public API.
---
 pom.xml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pom.xml b/pom.xml
index e9246ac..56cd2f0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -310,6 +310,7 @@
           <!-- Java API links configured in commons-parent create redirect warnings on JDK 17
             and fail the build. The links are not required as detectJavaApiLink=true (default). -->
           <links combine.self="override">
+            <link>https://commons.apache.org/proper/commons-rng/commons-rng-client-api/javadocs/api-${statistics.commons.rng.version}/</link>
           </links>
           <failOnWarnings>true</failOnWarnings>
           <!-- Enable MathJax -->
@@ -427,6 +428,7 @@
           <!-- Java API links configured in commons-parent create redirect warnings on JDK 17
             and fail the build. The links are not required as detectJavaApiLink=true (default). -->
           <links combine.self="override">
+            <link>https://commons.apache.org/proper/commons-rng/commons-rng-client-api/javadocs/api-${statistics.commons.rng.version}/</link>
           </links>
           <failOnWarnings>true</failOnWarnings>
           <!-- Enable MathJax -->


[commons-statistics] 01/05: Update supported bounds to use MathJax

Posted by ah...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-statistics.git

commit f099867b6227310a3cc86448e1209dbfb0629dcb
Author: aherbert <ah...@apache.org>
AuthorDate: Tue Nov 22 15:52:45 2022 +0000

    Update supported bounds to use MathJax
    
    This matches the inverse probability documentation.
---
 .../commons/statistics/distribution/ContinuousDistribution.java       | 4 ++--
 .../apache/commons/statistics/distribution/DiscreteDistribution.java  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/ContinuousDistribution.java b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/ContinuousDistribution.java
index 8e26379..d4646ea 100644
--- a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/ContinuousDistribution.java
+++ b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/ContinuousDistribution.java
@@ -156,7 +156,7 @@ public interface ContinuousDistribution {
      * Gets the lower bound of the support.
      * It must return the same value as
      * {@code inverseCumulativeProbability(0)}, i.e.
-     * {@code inf {x in R | P(X <= x) > 0}}.
+     * \( \inf \{ x \in \mathbb R : P(X \le x) \gt 0 \} \).
      *
      * @return the lower bound of the support.
      */
@@ -166,7 +166,7 @@ public interface ContinuousDistribution {
      * Gets the upper bound of the support.
      * It must return the same
      * value as {@code inverseCumulativeProbability(1)}, i.e.
-     * {@code inf {x in R | P(X <= x) = 1}}.
+     * \( \inf \{ x \in \mathbb R : P(X \le x) = 1 \} \).
      *
      * @return the upper bound of the support.
      */
diff --git a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/DiscreteDistribution.java b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/DiscreteDistribution.java
index 8560bae..5a9f0ed 100644
--- a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/DiscreteDistribution.java
+++ b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/DiscreteDistribution.java
@@ -175,7 +175,7 @@ public interface DiscreteDistribution {
      * Gets the lower bound of the support.
      * This method must return the same value as
      * {@code inverseCumulativeProbability(0)}, i.e.
-     * {@code inf {x in Z | P(X <= x) > 0}}.
+     * \( \inf \{ x \in \mathbb Z : P(X \le x) \gt 0 \} \).
      * By convention, {@code Integer.MIN_VALUE} should be substituted
      * for negative infinity.
      *
@@ -187,7 +187,7 @@ public interface DiscreteDistribution {
      * Gets the upper bound of the support.
      * This method must return the same value as
      * {@code inverseCumulativeProbability(1)}, i.e.
-     * {@code inf {x in Z | P(X <= x) = 1}}.
+     * \( \inf \{ x \in \mathbb Z : P(X \le x) = 1 \} \).
      * By convention, {@code Integer.MAX_VALUE} should be substituted
      * for positive infinity.
      *


[commons-statistics] 04/05: Removed TODO comment in discrete distribution inverse probability method

Posted by ah...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-statistics.git

commit 857cff291252cab107ddb4e0562719e003d0f9db
Author: aherbert <ah...@apache.org>
AuthorDate: Tue Nov 22 16:56:12 2022 +0000

    Removed TODO comment in discrete distribution inverse probability method
    
    The task has been captured in STATISTICS-58.
---
 .../commons/statistics/distribution/AbstractDiscreteDistribution.java | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/AbstractDiscreteDistribution.java b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/AbstractDiscreteDistribution.java
index 81b426c..83843a5 100644
--- a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/AbstractDiscreteDistribution.java
+++ b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/AbstractDiscreteDistribution.java
@@ -197,10 +197,6 @@ abstract class AbstractDiscreteDistribution
             }
         }
 
-        // TODO
-        // Improve the simple bisection to use a faster search,
-        // e.g. a BrentSolver.
-
         return solveInverseProbability(fun, lower, upper);
     }
 


[commons-statistics] 03/05: Correct javadoc parameter names

Posted by ah...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-statistics.git

commit 17ad2b90d2736d64a86f84e259c0e14129039004
Author: aherbert <ah...@apache.org>
AuthorDate: Tue Nov 22 16:39:28 2022 +0000

    Correct javadoc parameter names
---
 .../distribution/HypergeometricDistribution.java   | 43 +++++++++++-----------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/HypergeometricDistribution.java b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/HypergeometricDistribution.java
index 0e669b3..69734a1 100644
--- a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/HypergeometricDistribution.java
+++ b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/HypergeometricDistribution.java
@@ -22,7 +22,7 @@ package org.apache.commons.statistics.distribution;
  *
  * <p>The probability mass function of \( X \) is:
  *
- * <p>\[ f(k; n, p) = \frac{\binom{K}{k} \binom{N - K}{n-k}}{\binom{N}{n}} \]
+ * <p>\[ f(k; N, K, n) = \frac{\binom{K}{k} \binom{N - K}{n-k}}{\binom{N}{n}} \]
  *
  * <p>for \( N \in \{0, 1, 2, \dots\} \) the population size,
  * \( K \in \{0, 1, \dots, N\} \) the number of success states,
@@ -111,25 +111,27 @@ public final class HypergeometricDistribution extends AbstractDiscreteDistributi
      * Return the lowest domain value for the given hypergeometric distribution
      * parameters.
      *
-     * @param n Population size.
-     * @param m Number of successes in the population.
-     * @param k Sample size.
+     * @param nn Population size.
+     * @param k Number of successes in the population.
+     * @param n Sample size.
      * @return the lowest domain value of the hypergeometric distribution.
      */
-    private static int getLowerDomain(int n, int m, int k) {
-        return Math.max(0, m - (n - k));
+    private static int getLowerDomain(int nn, int k, int n) {
+        // Avoid overflow given N > n:
+        // n + K - N == K - (N - n)
+        return Math.max(0, k - (nn - n));
     }
 
     /**
      * Return the highest domain value for the given hypergeometric distribution
      * parameters.
      *
-     * @param m Number of successes in the population.
-     * @param k Sample size.
+     * @param k Number of successes in the population.
+     * @param n Sample size.
      * @return the highest domain value of the hypergeometric distribution.
      */
-    private static int getUpperDomain(int m, int k) {
-        return Math.min(k, m);
+    private static int getUpperDomain(int k, int n) {
+        return Math.min(n, k);
     }
 
     /**
@@ -248,10 +250,10 @@ public final class HypergeometricDistribution extends AbstractDiscreteDistributi
     /**
      * {@inheritDoc}
      *
-     * <p>For population size \( N \), number of successes \( m \), and sample
+     * <p>For population size \( N \), number of successes \( K \), and sample
      * size \( n \), the mean is:
      *
-     * <p>\[ n \frac{m}{N} \]
+     * <p>\[ n \frac{K}{N} \]
      */
     @Override
     public double getMean() {
@@ -261,25 +263,24 @@ public final class HypergeometricDistribution extends AbstractDiscreteDistributi
     /**
      * {@inheritDoc}
      *
-     * <p>For population size \( N \), number of successes \( m \), and sample
+     * <p>For population size \( N \), number of successes \( K \), and sample
      * size \( n \), the variance is:
      *
-     * <p>\[ n \frac{m}{N} \frac{N-m}{N} \frac{N-n}{N-1} \]
+     * <p>\[ n \frac{K}{N} \frac{N-K}{N} \frac{N-n}{N-1} \]
      */
     @Override
     public double getVariance() {
         final double N = getPopulationSize();
-        final double m = getNumberOfSuccesses();
+        final double K = getNumberOfSuccesses();
         final double n = getSampleSize();
-        return (n * m * (N - m) * (N - n)) / (N * N * (N - 1));
+        return (n * K * (N - K) * (N - n)) / (N * N * (N - 1));
     }
 
     /**
      * {@inheritDoc}
      *
-     * <p>For population size {@code N}, number of successes {@code m}, and sample
-     * size {@code n}, the lower bound of the support is
-     * {@code max(0, n + m - N)}.
+     * <p>For population size \( N \), number of successes \( K \), and sample
+     * size \( n \), the lower bound of the support is \( \max \{ 0, n + K - N \} \).
      *
      * @return lower bound of the support
      */
@@ -291,8 +292,8 @@ public final class HypergeometricDistribution extends AbstractDiscreteDistributi
     /**
      * {@inheritDoc}
      *
-     * <p>For number of successes {@code m} and sample size {@code n}, the upper
-     * bound of the support is {@code min(m, n)}.
+     * <p>For number of successes \( K \), and sample
+     * size \( n \), the upper bound of the support is \( \min \{ K, n \} \).
      *
      * @return upper bound of the support
      */