You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ps...@apache.org on 2014/05/05 01:19:43 UTC
svn commit: r1592430 - in /commons/proper/math/trunk/src:
main/java/org/apache/commons/math3/stat/inference/ site/xdoc/userguide/
test/java/org/apache/commons/math3/stat/inference/
Author: psteitz
Date: Sun May 4 23:19:43 2014
New Revision: 1592430
URL: http://svn.apache.org/r1592430
Log:
Added StatUtils convenience methods and updated user guide for Kolmogorov-Smirnov tests. JIRA: MATH-437.
Modified:
commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/inference/TestUtils.java
commons/proper/math/trunk/src/site/xdoc/userguide/stat.xml
commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTestTest.java
commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/inference/TestUtilsTest.java
Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/inference/TestUtils.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/inference/TestUtils.java?rev=1592430&r1=1592429&r2=1592430&view=diff
==============================================================================
--- commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/inference/TestUtils.java (original)
+++ commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/inference/TestUtils.java Sun May 4 23:19:43 2014
@@ -17,8 +17,11 @@
package org.apache.commons.math3.stat.inference;
import java.util.Collection;
+
+import org.apache.commons.math3.distribution.RealDistribution;
import org.apache.commons.math3.exception.ConvergenceException;
import org.apache.commons.math3.exception.DimensionMismatchException;
+import org.apache.commons.math3.exception.InsufficientDataException;
import org.apache.commons.math3.exception.MaxCountExceededException;
import org.apache.commons.math3.exception.NoDataException;
import org.apache.commons.math3.exception.NotPositiveException;
@@ -50,6 +53,9 @@ public class TestUtils {
/** Singleton G-Test instance. */
private static final GTest G_TEST = new GTest();
+ /** Singleton K-S test instance */
+ private static final KolmogorovSmirnovTest KS_TEST = new KolmogorovSmirnovTest();
+
/**
* Prevent instantiation.
*/
@@ -449,6 +455,94 @@ public class TestUtils {
return G_TEST.gTestDataSetsComparison(observed1, observed2, alpha);
}
+ /**
+ * @see org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest#kolmogorovSmirnovStatistic(RealDistribution, double[])
+ * @since 3.3
+ */
+ public static double kolmogorovSmirnovStatistic(RealDistribution dist, double[] data)
+ throws InsufficientDataException, NullArgumentException {
+ return KS_TEST.kolmogorovSmirnovStatistic(dist, data);
+ }
+
+ /**
+ * @see org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest#kolmogorovSmirnovTest(RealDistribution, double[])
+ * @since 3.3
+ */
+ public static double kolmogorovSmirnovTest(RealDistribution dist, double[] data)
+ throws InsufficientDataException, NullArgumentException {
+ return KS_TEST.kolmogorovSmirnovTest(dist, data);
+ }
+
+ /**
+ * @see org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest#kolmogorovSmirnovTest(RealDistribution, double[], boolean)
+ * @since 3.3
+ */
+ public static double kolmogorovSmirnovTest(RealDistribution dist, double[] data, boolean strict)
+ throws InsufficientDataException, NullArgumentException {
+ return KS_TEST.kolmogorovSmirnovTest(dist, data, strict);
+ }
+
+ /**
+ * @see org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest#kolmogorovSmirnovTest(RealDistribution, double[], double)
+ * @since 3.3
+ */
+ public static boolean kolmogorovSmirnovTest(RealDistribution dist, double[] data, double alpha)
+ throws InsufficientDataException, NullArgumentException {
+ return KS_TEST.kolmogorovSmirnovTest(dist, data, alpha);
+ }
+
+ /**
+ * @see org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest#kolmogorovSmirnovStatistic(double[], double[])
+ * @since 3.3
+ */
+ public static double kolmogorovSmirnovStatistic(double[] x, double[] y)
+ throws InsufficientDataException, NullArgumentException {
+ return KS_TEST.kolmogorovSmirnovStatistic(x, y);
+ }
+
+ /**
+ * @see kolmogorovSmirnovTest(double[], double[])
+ * @since 3.3
+ */
+ public static double kolmogorovSmirnovTest(double[] x, double[] y)
+ throws InsufficientDataException, NullArgumentException {
+ return KS_TEST.kolmogorovSmirnovTest(x, y);
+ }
+
+ /**
+ * @see org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest#kolmogorovSmirnovTest(double[], double[], boolean)
+ * @since 3.3
+ */
+ public static double kolmogorovSmirnovTest(double[] x, double[] y, boolean strict)
+ throws InsufficientDataException, NullArgumentException {
+ return KS_TEST.kolmogorovSmirnovTest(x, y, strict);
+ }
+
+ /**
+ * @see org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest#exactP(double, int, int, boolean)
+ * @since 3.3
+ */
+ public static double exactP(double d, int m, int n, boolean strict) {
+ return KS_TEST.exactP(d, n, m, strict);
+ }
+
+ /**
+ * @see org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest#approximateP(double, int, int)
+ * @since 3.3
+ */
+ public static double approximateP(double d, int n, int m) {
+ return KS_TEST.approximateP(d, n, m);
+ }
+
+ /**
+ * @see org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest#monteCarloP(double, int, int, boolean, int)
+ * @since 3.3
+ */
+ public static double monteCarloP(double d, int n, int m, boolean strict, int iterations) {
+ return KS_TEST.monteCarloP(d, n, m, strict, iterations);
+ }
+
+
// CHECKSTYLE: resume JavadocMethodCheck
}
Modified: commons/proper/math/trunk/src/site/xdoc/userguide/stat.xml
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/site/xdoc/userguide/stat.xml?rev=1592430&r1=1592429&r2=1592430&view=diff
==============================================================================
--- commons/proper/math/trunk/src/site/xdoc/userguide/stat.xml (original)
+++ commons/proper/math/trunk/src/site/xdoc/userguide/stat.xml Sun May 4 23:19:43 2014
@@ -837,7 +837,8 @@ new KendallsCorrelation().correlation(x,
<a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
p-values</a> associated with <code>t-</code>,
<code>Chi-Square</code>, <code>G</code>, <code>One-Way ANOVA</code>, <code>Mann-Whitney U</code>
- and <code>Wilcoxon signed rank</code> tests. The respective test classes are
+ <code>Wilcoxon signed rank</code>, and <code>Kolmogorov-Smirnov</code> tests.
+ The respective test classes are
<a href="../apidocs/org/apache/commons/math3/stat/inference/TTest.html">
TTest</a>,
<a href="../apidocs/org/apache/commons/math3/stat/inference/ChiSquareTest.html">
@@ -849,16 +850,18 @@ new KendallsCorrelation().correlation(x,
<a href="../apidocs/org/apache/commons/math3/stat/inference/MannWhitneyUTest.html">
MannWhitneyUTest</a>,
<a href="../apidocs/org/apache/commons/math3/stat/inference/WilcoxonSignedRankTest.html">
- WilcoxonSignedRankTest</a> and
+ WilcoxonSignedRankTest</a>,
<a href="../apidocs/org/apache/commons/math3/stat/inference/BinomialTest.html">
- BinomialTest</a>.
+ BinomialTest</a> and
+ <a href="../apidocs/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTest.html">
+ KolmogorovSmirnovTest</a>.
The <a href="../apidocs/org/apache/commons/math3/stat/inference/TestUtils.html">
TestUtils</a> class provides static methods to get test instances or
to compute test statistics directly. The examples below all use the
static methods in <code>TestUtils</code> to execute tests. To get
test object instances, either use e.g., <code>TestUtils.getTTest()</code>
or use the implementation constructors directly, e.g. <code>new TTest()</code>.
- </p>
+ </p>
<p>
<strong>Implementation Notes</strong>
<ul>
@@ -899,6 +902,24 @@ new KendallsCorrelation().correlation(x,
(resp. critical values) by 2.</li>
<li>Degrees of freedom for G- and chi-square tests are integral values, based on the
number of observed or expected counts (number of observed counts - 1).</li>
+ <li> The KolmogorovSmirnov test uses a statistic based on the maximum deviation of
+ the empirical distribution of sample data points from the distribution expected
+ under the null hypothesis. Specifically, what is computed is
+ \(D_n=\sup_x |F_n(x)-F(x)|\), where \(F\) is the expected distribution and
+ \(F_n\) is the empirical distribution of the \(n\) sample data points. Both
+ one-sample tests against a <code>RealDistribution</code> and two-sample tests
+ (comparing two empirical distributions) are supported. For one-sample tests,
+ the distribution of \(D_n\) is estimated using the method in
+ <a href="http://www.jstatsoft.org/v08/i18/">Evaluating Kolmogorov's Distribution</a> by
+ George Marsaglia, Wai Wan Tsang, and Jingbo Wang, with quick decisions in some cases
+ for extreme values using the method described in
+ <a href="http://www.jstatsoft.org/v39/i11/"> Computing the Two-Sided Kolmogorov-Smirnov
+ Distribution</a> by Richard Simard and Pierre L'Ecuyer. In the 2-sample case, estimation
+ by default depends on the number of data points. For small samples, the distribution
+ is computed exactly; for moderately large samples a Monte Carlo procedure is used, and
+ for large samples a numerical approximation of the Kolmogorov distribution is used.
+ Methods to perform each type of p-value estimation are also exposed directly. See
+ the class javadoc for details.</li>
</ul>
</p>
<p>
@@ -1179,6 +1200,46 @@ TestUtils.oneWayAnovaTest(classes, 0.01)
// true means reject null hypothesis
</source>
</dd>
+ <br/>
+ <dt><strong>Kolmogorov-Smirnov tests</strong></dt>
+ <br/>
+ <dd>Given a double[] array <code>data</code> of values, to evaluate the
+ null hypothesis that the values are drawn from a unit normal distribution
+ <source>
+final NormalDistribution unitNormal = new NormalDistribution(0d, 1d);
+TestUtils.kolmogorovSmirnovTest(unitNormal, sample, false)
+ </source>
+ returns the p-value and
+ <source>
+TestUtils.kolmogorovSmirnovStatistic(unitNormal, sample)
+ </source>
+ returns the D-statistic.
+ <br/>
+ If <code>y</code> is a double array, to evaluate the null hypothesis that
+ <code>x</code> and <code>y</code> are drawn from the same underlying distribution,
+ use
+ <source>
+TestUtils.kolmogorovSmirnovStatistic(x, y)
+ </source>
+ to compute the D-statistic and
+ <source>
+TestUtils.kolmogorovSmirnovTest(x, y)
+ </source>
+ for the p-value associated with the null hypothesis that <code>x</code> and
+ <code>y</code> come from the same distribution. By default, here and above strict
+ inequality is used in the null hypothesis - i.e., we evaluate \(H_0 : D_{n,m} > d \).
+ To make the inequality above non-strict, add <code>false</code> as an actual parameter
+ above. For large samples, this parameter makes no difference.
+ <br/>
+ To force exact computation of the p-value (overriding the selection of estimation
+ method), first compute the d-statistic and then use the <code>exactP</code> method
+ <source>
+final double d = TestUtils.kolmogorovSmirnovStatistic(x, y);
+TestUtils.exactP(d, x.length, y.length, false)
+ </source>
+ assuming that the non-strict form of the null hypothesis is desired. Note, however,
+ that exact computation for anything but very small samples takes a very long time.
+ </dd>
</dl>
</p>
</subsection>
Modified: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTestTest.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTestTest.java?rev=1592430&r1=1592429&r2=1592430&view=diff
==============================================================================
--- commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTestTest.java (original)
+++ commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTestTest.java Sun May 4 23:19:43 2014
@@ -32,10 +32,10 @@ import org.junit.Test;
*/
public class KolmogorovSmirnovTestTest {
- private static final double TOLERANCE = 10e-10;
+ protected static final double TOLERANCE = 10e-10;
// Random N(0,1) values generated using R rnorm
- private final double[] gaussian = {
+ protected static final double[] gaussian = {
0.26055895, -0.63665233, 1.51221323, 0.61246988, -0.03013003, -1.73025682, -0.51435805, 0.70494168, 0.18242945,
0.94734336, -0.04286604, -0.37931719, -1.07026403, -2.05861425, 0.11201862, 0.71400136, -0.52122185,
-0.02478725, -1.86811649, -1.79907688, 0.15046279, 1.32390193, 1.55889719, 1.83149171, -0.03948003,
@@ -51,7 +51,7 @@ public class KolmogorovSmirnovTestTest {
};
// Random N(0, 1.6) values generated using R rnorm
- private final double[] gaussian2 = {
+ protected static final double[] gaussian2 = {
2.88041498038308, -0.632349445671017, 0.402121295225571, 0.692626364613243, 1.30693446815426,
-0.714176317131286, -0.233169206599583, 1.09113298322107, -1.53149079994305, 1.23259966205809,
1.01389927412503, 0.0143898711497477, -0.512813545447559, 2.79364360835469, 0.662008875538092,
@@ -75,7 +75,7 @@ public class KolmogorovSmirnovTestTest {
};
// Random uniform (0, 1) generated using R runif
- private final double[] uniform = {
+ protected static final double[] uniform = {
0.7930305, 0.6424382, 0.8747699, 0.7156518, 0.1845909, 0.2022326, 0.4877206, 0.8928752, 0.2293062, 0.4222006,
0.1610459, 0.2830535, 0.9946345, 0.7329499, 0.26411126, 0.87958133, 0.29827437, 0.39185988, 0.38351185,
0.36359611, 0.48646472, 0.05577866, 0.56152250, 0.52672013, 0.13171783, 0.95864085, 0.03060207, 0.33514887,
Modified: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/inference/TestUtilsTest.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/inference/TestUtilsTest.java?rev=1592430&r1=1592429&r2=1592430&view=diff
==============================================================================
--- commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/inference/TestUtilsTest.java (original)
+++ commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/inference/TestUtilsTest.java Sun May 4 23:19:43 2014
@@ -19,6 +19,7 @@ package org.apache.commons.math3.stat.in
import java.util.ArrayList;
import java.util.List;
+import org.apache.commons.math3.distribution.NormalDistribution;
import org.apache.commons.math3.exception.DimensionMismatchException;
import org.apache.commons.math3.exception.NotPositiveException;
import org.apache.commons.math3.exception.NotStrictlyPositiveException;
@@ -528,4 +529,30 @@ public class TestUtilsTest {
Assert.assertEquals(FastMath.sqrt(5734.343), TestUtils.rootLogLikelihoodRatio(1000, 1000, 1000, 100000), 0.001);
Assert.assertEquals(FastMath.sqrt(5714.932), TestUtils.rootLogLikelihoodRatio(1000, 1000, 1000, 99000), 0.001);
}
+
+ @Test
+ public void testKSOneSample() throws Exception {
+ final NormalDistribution unitNormal = new NormalDistribution(0d, 1d);
+ final double[] sample = KolmogorovSmirnovTestTest.gaussian;
+ final double tol = KolmogorovSmirnovTestTest.TOLERANCE;
+ Assert.assertEquals(0.3172069207622391, TestUtils.kolmogorovSmirnovTest(unitNormal, sample), tol);
+ Assert.assertEquals(0.0932947561266756, TestUtils.kolmogorovSmirnovStatistic(unitNormal, sample), tol);
+ }
+
+ @Test
+ public void testKSTwoSample() throws Exception {
+ final double tol = KolmogorovSmirnovTestTest.TOLERANCE;
+ final double[] smallSample1 = {
+ 6, 7, 9, 13, 19, 21, 22, 23, 24
+ };
+ final double[] smallSample2 = {
+ 10, 11, 12, 16, 20, 27, 28, 32, 44, 54
+ };
+ Assert
+ .assertEquals(0.105577085453247, TestUtils.kolmogorovSmirnovTest(smallSample1, smallSample2, false), tol);
+ final double d = TestUtils.kolmogorovSmirnovStatistic(smallSample1, smallSample2);
+ Assert.assertEquals(0.5, d, tol);
+ Assert
+ .assertEquals(0.105577085453247, TestUtils.exactP(d, smallSample1.length,smallSample2.length, false), tol);
+ }
}