You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ah...@apache.org on 2021/12/14 08:50:28 UTC

[commons-statistics] 02/02: STATISTICS-39: Update chisq distribution tests

This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-statistics.git

commit 13f0c30d85d8f900982d207f368d284d0bb666a0
Author: Alex Herbert <ah...@apache.org>
AuthorDate: Mon Dec 13 23:14:43 2021 +0000

    STATISTICS-39: Update chisq distribution tests
    
    The gamma distribution density function was fixed for small shape
    values. This fixes the known failing tests for the chi-squared
    distribution.
    
    This requires a change to the BaseContinuousDistributionTest to exclude
    extremely steep density integrals.
---
 .../distribution/BaseContinuousDistributionTest.java | 20 +++++++++++++++++---
 .../distribution/ChiSquaredDistributionTest.java     |  2 +-
 .../distribution/test.chisquared.2.properties        | 11 ++---------
 .../distribution/test.chisquared.3.properties        |  2 +-
 .../distribution/test.chisquared.4.properties        |  6 +++---
 5 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/BaseContinuousDistributionTest.java b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/BaseContinuousDistributionTest.java
index a68b226..c12da52 100644
--- a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/BaseContinuousDistributionTest.java
+++ b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/BaseContinuousDistributionTest.java
@@ -25,6 +25,7 @@ import java.util.stream.Stream;
 import org.apache.commons.math3.analysis.UnivariateFunction;
 import org.apache.commons.math3.analysis.integration.BaseAbstractUnivariateIntegrator;
 import org.apache.commons.math3.analysis.integration.IterativeLegendreGaussIntegrator;
+import org.apache.commons.math3.exception.MaxCountExceededException;
 import org.apache.commons.math3.util.MathArrays;
 import org.apache.commons.rng.simple.RandomSource;
 import org.apache.commons.statistics.distribution.DistributionTestData.ContinuousDistributionTestData;
@@ -410,7 +411,7 @@ abstract class BaseContinuousDistributionTest
      * @return the stream
      */
     Stream<Arguments> testDensityIntegrals() {
-        // Create a tolerance suitable for the same thresholds used by the intergator.
+        // Create a tolerance suitable for the same thresholds used by the integrator.
         final Function<ContinuousDistributionTestData, DoubleTolerance> tolerance =
             d -> createAbsOrRelTolerance(INTEGRATOR_ABS_ACCURACY * 10, INTEGRATOR_REL_ACCURACY * 10);
         return stream(ContinuousDistributionTestData::isDisablePdf,
@@ -879,10 +880,23 @@ abstract class BaseContinuousDistributionTest
         for (int i = 1; i < integrationTestPoints.size(); i++) {
             final double x0 = integrationTestPoints.get(i - 1);
             final double x1 = integrationTestPoints.get(i);
+            // Exclude extremely steep integrals
+            // (e.g. the gamma distribution with shape < 1)
+            if (Math.max(dist.density(x0), dist.density(x1)) > 1e3) {
+                continue;
+            }
+            double integral = 0;
+            try {
+                // Integrals may be slow to converge
+                integral = integrator.integrate(1000000,
+                                                d, x0, x1);
+            } catch (MaxCountExceededException e) {
+                Assertions.fail("Failed density integral: " + x0 + " to " + x1, e);
+            }
             TestUtils.assertEquals(
                 dist.probability(x0, x1),
-                integrator.integrate(1000000, // Integrals may be slow to converge
-                                     d, x0, x1), tolerance,
+                integral,
+                tolerance,
                 () -> "Invalid density integral: " + x0 + " to " + x1);
         }
     }
diff --git a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/ChiSquaredDistributionTest.java b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/ChiSquaredDistributionTest.java
index c0fef5a..89d2472 100644
--- a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/ChiSquaredDistributionTest.java
+++ b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/ChiSquaredDistributionTest.java
@@ -48,7 +48,7 @@ class ChiSquaredDistributionTest extends BaseContinuousDistributionTest {
     @Test
     void testAdditionalDensity() {
         // Values have many digits above the decimal point so use relative tolerance
-        final DoubleTolerance tol = createRelTolerance(1e-9);
+        final DoubleTolerance tol = createRelTolerance(5e-14);
 
         final double[] x = new double[]{-0.1, 1e-6, 0.5, 1, 2, 5};
         // R 2.5:
diff --git a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.chisquared.2.properties b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.chisquared.2.properties
index 2be3be6..78a1d39 100644
--- a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.chisquared.2.properties
+++ b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.chisquared.2.properties
@@ -31,7 +31,8 @@ pdf.values = \
   6.3950485031051161e-04 7.9908952673546779e-03 2.4960391325813031e-02 \
   6.9275023117637585e-02 2.7413671677094120e-01 0.0000000000000000e+00
 
-# TODO: Sampling does not work for small degrees of freedom
+# TODO: Sampling test fails
+# The sampler is OK. The inverse CDF to generate the quartiles fails.
 disable.sample = true
 
 # TODO: CDF inverse test fails
@@ -39,11 +40,3 @@ disable.cdf.inverse = true
 
 # TODO: SF inverse test fails
 disable.sf.inverse = true
-
-# TODO: Correct small degrees of freedom PDF
-# The underlying Gamma distribution currently switches to the alternate
-# computation which is inaccurate for expected value 4.27e55. The natural
-# calculation computes the correct answer thus the overflow
-# conditions require revision.
-disable.pdf = true
-disable.logpdf = true
diff --git a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.chisquared.3.properties b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.chisquared.3.properties
index c8631cf..26d6a85 100644
--- a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.chisquared.3.properties
+++ b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.chisquared.3.properties
@@ -15,7 +15,7 @@
 
 parameters = 2.0
 # Limited by CDF inverse mapping test
-tolerance.relative = 1e-7
+tolerance.relative = 1.5e-8
 mean = 2.0
 variance = 4.0
 lower = 0
diff --git a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.chisquared.4.properties b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.chisquared.4.properties
index a4de5dc..81fb55c 100644
--- a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.chisquared.4.properties
+++ b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.chisquared.4.properties
@@ -14,12 +14,12 @@
 # limitations under the License.
 
 parameters = 1.0
+# TODO: Limited by the CDF inverse mapping.
+tolerance.relative = 5e-7
+tolerance.absolute = 2e-10
 mean = 1.0
 variance = 2.0
 lower = 0
-# TODO: Tolerance is limited by the PDF value 3.1830955285046593e+02
-# A relative tolerance is better for values with a large range.
-tolerance.absolute = 1e-7
 # Computed with R version 1.8.1 (linux version)
 cdf.points = \
   0.0000000000000000e+00 1.5707971492624904e-06 1.5708785790970206e-04 \