You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ps...@apache.org on 2009/09/05 19:37:05 UTC
svn commit: r811685 [12/24] - in /commons/proper/math/trunk: ./
src/main/java/org/apache/commons/math/
src/main/java/org/apache/commons/math/analysis/
src/main/java/org/apache/commons/math/analysis/integration/
src/main/java/org/apache/commons/math/ana...
Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/inference/TTest.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/inference/TTest.java?rev=811685&r1=811684&r2=811685&view=diff
==============================================================================
--- commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/inference/TTest.java (original)
+++ commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/inference/TTest.java Sat Sep 5 17:36:48 2009
@@ -38,19 +38,19 @@
* Significance levels are always specified as numbers between 0 and 0.5
* (e.g. tests at the 95% level use <code>alpha=0.05</code>).</p>
* <p>
- * Input to tests can be either <code>double[]</code> arrays or
+ * Input to tests can be either <code>double[]</code> arrays or
* {@link StatisticalSummary} instances.</p>
- *
*
- * @version $Revision$ $Date$
+ *
+ * @version $Revision$ $Date$
*/
public interface TTest {
/**
- * Computes a paired, 2-sample t-statistic based on the data in the input
+ * Computes a paired, 2-sample t-statistic based on the data in the input
* arrays. The t-statistic returned is equivalent to what would be returned by
* computing the one-sample t-statistic {@link #t(double, double[])}, with
- * <code>mu = 0</code> and the sample array consisting of the (signed)
- * differences between corresponding entries in <code>sample1</code> and
+ * <code>mu = 0</code> and the sample array consisting of the (signed)
+ * differences between corresponding entries in <code>sample1</code> and
* <code>sample2.</code>
* <p>
* <strong>Preconditions</strong>: <ul>
@@ -68,24 +68,24 @@
public abstract double pairedT(double[] sample1, double[] sample2)
throws IllegalArgumentException, MathException;
/**
- * Returns the <i>observed significance level</i>, or
- * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test
+ * Returns the <i>observed significance level</i>, or
+ * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test
* based on the data in the input arrays.
* <p>
* The number returned is the smallest significance level
* at which one can reject the null hypothesis that the mean of the paired
- * differences is 0 in favor of the two-sided alternative that the mean paired
- * difference is not equal to 0. For a one-sided test, divide the returned
+ * differences is 0 in favor of the two-sided alternative that the mean paired
+ * difference is not equal to 0. For a one-sided test, divide the returned
* value by 2.</p>
* <p>
* This test is equivalent to a one-sample t-test computed using
* {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample
- * array consisting of the signed differences between corresponding elements of
+ * array consisting of the signed differences between corresponding elements of
* <code>sample1</code> and <code>sample2.</code></p>
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the p-value depends on the assumptions of the parametric
- * t-test procedure, as discussed
+ * t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
* here</a></p>
* <p>
@@ -103,24 +103,24 @@
public abstract double pairedTTest(double[] sample1, double[] sample2)
throws IllegalArgumentException, MathException;
/**
- * Performs a paired t-test evaluating the null hypothesis that the
+ * Performs a paired t-test evaluating the null hypothesis that the
* mean of the paired differences between <code>sample1</code> and
- * <code>sample2</code> is 0 in favor of the two-sided alternative that the
- * mean paired difference is not equal to 0, with significance level
+ * <code>sample2</code> is 0 in favor of the two-sided alternative that the
+ * mean paired difference is not equal to 0, with significance level
* <code>alpha</code>.
* <p>
- * Returns <code>true</code> iff the null hypothesis can be rejected with
- * confidence <code>1 - alpha</code>. To perform a 1-sided test, use
+ * Returns <code>true</code> iff the null hypothesis can be rejected with
+ * confidence <code>1 - alpha</code>. To perform a 1-sided test, use
* <code>alpha * 2</code></p>
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the test depends on the assumptions of the parametric
- * t-test procedure, as discussed
+ * t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
* here</a></p>
* <p>
* <strong>Preconditions</strong>: <ul>
- * <li>The input array lengths must be the same and their common length
+ * <li>The input array lengths must be the same and their common length
* must be at least 2.
* </li>
* <li> <code> 0 < alpha < 0.5 </code>
@@ -129,7 +129,7 @@
* @param sample1 array of sample data values
* @param sample2 array of sample data values
* @param alpha significance level of the test
- * @return true if the null hypothesis can be rejected with
+ * @return true if the null hypothesis can be rejected with
* confidence 1 - alpha
* @throws IllegalArgumentException if the preconditions are not met
* @throws MathException if an error occurs performing the test
@@ -140,7 +140,7 @@
double alpha)
throws IllegalArgumentException, MathException;
/**
- * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
+ * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
* t statistic </a> given observed values and a comparison constant.
* <p>
* This statistic can be used to perform a one sample t-test for the mean.
@@ -158,7 +158,7 @@
throws IllegalArgumentException;
/**
* Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
- * t statistic </a> to use in comparing the mean of the dataset described by
+ * t statistic </a> to use in comparing the mean of the dataset described by
* <code>sampleStats</code> to <code>mu</code>.
* <p>
* This statistic can be used to perform a one sample t-test for the mean.
@@ -175,7 +175,7 @@
public abstract double t(double mu, StatisticalSummary sampleStats)
throws IllegalArgumentException;
/**
- * Computes a 2-sample t statistic, under the hypothesis of equal
+ * Computes a 2-sample t statistic, under the hypothesis of equal
* subpopulation variances. To compute a t-statistic without the
* equal variances hypothesis, use {@link #t(double[], double[])}.
* <p>
@@ -186,15 +186,15 @@
* <p>
* <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
* </p><p>
- * where <strong><code>n1</code></strong> is the size of first sample;
- * <strong><code> n2</code></strong> is the size of second sample;
- * <strong><code> m1</code></strong> is the mean of first sample;
+ * where <strong><code>n1</code></strong> is the size of first sample;
+ * <strong><code> n2</code></strong> is the size of second sample;
+ * <strong><code> m1</code></strong> is the mean of first sample;
* <strong><code> m2</code></strong> is the mean of second sample</li>
* </ul>
* and <strong><code>var</code></strong> is the pooled variance estimate:
* </p><p>
* <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
- * </p><p>
+ * </p><p>
* with <strong><code>var1<code></strong> the variance of the first sample and
* <strong><code>var2</code></strong> the variance of the second sample.
* </p><p>
@@ -222,11 +222,11 @@
* <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
* </p><p>
* where <strong><code>n1</code></strong> is the size of the first sample
- * <strong><code> n2</code></strong> is the size of the second sample;
- * <strong><code> m1</code></strong> is the mean of the first sample;
+ * <strong><code> n2</code></strong> is the size of the second sample;
+ * <strong><code> m1</code></strong> is the mean of the first sample;
* <strong><code> m2</code></strong> is the mean of the second sample;
* <strong><code> var1</code></strong> is the variance of the first sample;
- * <strong><code> var2</code></strong> is the variance of the second sample;
+ * <strong><code> var2</code></strong> is the variance of the second sample;
* </p><p>
* <strong>Preconditions</strong>: <ul>
* <li>The observed array lengths must both be at least 2.
@@ -242,7 +242,7 @@
/**
* Computes a 2-sample t statistic </a>, comparing the means of the datasets
* described by two {@link StatisticalSummary} instances, without the
- * assumption of equal subpopulation variances. Use
+ * assumption of equal subpopulation variances. Use
* {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
* compute a t-statistic under the equal variances assumption.
* <p>
@@ -253,11 +253,11 @@
* <p>
* <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
* </p><p>
- * where <strong><code>n1</code></strong> is the size of the first sample;
- * <strong><code> n2</code></strong> is the size of the second sample;
- * <strong><code> m1</code></strong> is the mean of the first sample;
+ * where <strong><code>n1</code></strong> is the size of the first sample;
+ * <strong><code> n2</code></strong> is the size of the second sample;
+ * <strong><code> m1</code></strong> is the mean of the first sample;
* <strong><code> m2</code></strong> is the mean of the second sample
- * <strong><code> var1</code></strong> is the variance of the first sample;
+ * <strong><code> var1</code></strong> is the variance of the first sample;
* <strong><code> var2</code></strong> is the variance of the second sample
* </p><p>
* <strong>Preconditions</strong>: <ul>
@@ -278,7 +278,7 @@
* Computes a 2-sample t statistic, comparing the means of the datasets
* described by two {@link StatisticalSummary} instances, under the
* assumption of equal subpopulation variances. To compute a t-statistic
- * without the equal variances assumption, use
+ * without the equal variances assumption, use
* {@link #t(StatisticalSummary, StatisticalSummary)}.
* <p>
* This statistic can be used to perform a (homoscedastic) two-sample
@@ -288,14 +288,14 @@
* <p>
* <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
* </p><p>
- * where <strong><code>n1</code></strong> is the size of first sample;
- * <strong><code> n2</code></strong> is the size of second sample;
- * <strong><code> m1</code></strong> is the mean of first sample;
+ * where <strong><code>n1</code></strong> is the size of first sample;
+ * <strong><code> n2</code></strong> is the size of second sample;
+ * <strong><code> m1</code></strong> is the mean of first sample;
* <strong><code> m2</code></strong> is the mean of second sample
* and <strong><code>var</code></strong> is the pooled variance estimate:
* </p><p>
* <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
- * </p><p>
+ * </p><p>
* with <strong><code>var1<code></strong> the variance of the first sample and
* <strong><code>var2</code></strong> the variance of the second sample.
* </p><p>
@@ -314,19 +314,19 @@
StatisticalSummary sampleStats2)
throws IllegalArgumentException;
/**
- * Returns the <i>observed significance level</i>, or
- * <i>p-value</i>, associated with a one-sample, two-tailed t-test
+ * Returns the <i>observed significance level</i>, or
+ * <i>p-value</i>, associated with a one-sample, two-tailed t-test
* comparing the mean of the input array with the constant <code>mu</code>.
* <p>
* The number returned is the smallest significance level
- * at which one can reject the null hypothesis that the mean equals
+ * at which one can reject the null hypothesis that the mean equals
* <code>mu</code> in favor of the two-sided alternative that the mean
- * is different from <code>mu</code>. For a one-sided test, divide the
+ * is different from <code>mu</code>. For a one-sided test, divide the
* returned value by 2.</p>
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the test depends on the assumptions of the parametric
- * t-test procedure, as discussed
+ * t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
* </p><p>
* <strong>Preconditions</strong>: <ul>
@@ -346,8 +346,8 @@
* two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
* which <code>sample</code> is drawn equals <code>mu</code>.
* <p>
- * Returns <code>true</code> iff the null hypothesis can be
- * rejected with confidence <code>1 - alpha</code>. To
+ * Returns <code>true</code> iff the null hypothesis can be
+ * rejected with confidence <code>1 - alpha</code>. To
* perform a 1-sided test, use <code>alpha * 2</code></p>
* <p>
* <strong>Examples:</strong><br><ol>
@@ -355,14 +355,14 @@
* the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
* </li>
* <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
- * at the 99% level, first verify that the measured sample mean is less
- * than <code>mu</code> and then use
+ * at the 99% level, first verify that the measured sample mean is less
+ * than <code>mu</code> and then use
* <br><code>tTest(mu, sample, 0.02) </code>
* </li></ol></p>
* <p>
* <strong>Usage Note:</strong><br>
- * The validity of the test depends on the assumptions of the one-sample
- * parametric t-test procedure, as discussed
+ * The validity of the test depends on the assumptions of the one-sample
+ * parametric t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
* </p><p>
* <strong>Preconditions</strong>: <ul>
@@ -379,20 +379,20 @@
public abstract boolean tTest(double mu, double[] sample, double alpha)
throws IllegalArgumentException, MathException;
/**
- * Returns the <i>observed significance level</i>, or
- * <i>p-value</i>, associated with a one-sample, two-tailed t-test
+ * Returns the <i>observed significance level</i>, or
+ * <i>p-value</i>, associated with a one-sample, two-tailed t-test
* comparing the mean of the dataset described by <code>sampleStats</code>
* with the constant <code>mu</code>.
* <p>
* The number returned is the smallest significance level
- * at which one can reject the null hypothesis that the mean equals
+ * at which one can reject the null hypothesis that the mean equals
* <code>mu</code> in favor of the two-sided alternative that the mean
- * is different from <code>mu</code>. For a one-sided test, divide the
+ * is different from <code>mu</code>. For a one-sided test, divide the
* returned value by 2.</p>
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the test depends on the assumptions of the parametric
- * t-test procedure, as discussed
+ * t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
* here</a></p>
* <p>
@@ -423,14 +423,14 @@
* the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
* </li>
* <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
- * at the 99% level, first verify that the measured sample mean is less
- * than <code>mu</code> and then use
+ * at the 99% level, first verify that the measured sample mean is less
+ * than <code>mu</code> and then use
* <br><code>tTest(mu, sampleStats, 0.02) </code>
* </li></ol></p>
* <p>
* <strong>Usage Note:</strong><br>
- * The validity of the test depends on the assumptions of the one-sample
- * parametric t-test procedure, as discussed
+ * The validity of the test depends on the assumptions of the one-sample
+ * parametric t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
* </p><p>
* <strong>Preconditions</strong>: <ul>
@@ -450,28 +450,28 @@
double alpha)
throws IllegalArgumentException, MathException;
/**
- * Returns the <i>observed significance level</i>, or
- * <i>p-value</i>, associated with a two-sample, two-tailed t-test
+ * Returns the <i>observed significance level</i>, or
+ * <i>p-value</i>, associated with a two-sample, two-tailed t-test
* comparing the means of the input arrays.
* <p>
* The number returned is the smallest significance level
* at which one can reject the null hypothesis that the two means are
- * equal in favor of the two-sided alternative that they are different.
+ * equal in favor of the two-sided alternative that they are different.
* For a one-sided test, divide the returned value by 2.</p>
* <p>
* The test does not assume that the underlying popuation variances are
- * equal and it uses approximated degrees of freedom computed from the
+ * equal and it uses approximated degrees of freedom computed from the
* sample data to compute the p-value. The t-statistic used is as defined in
* {@link #t(double[], double[])} and the Welch-Satterthwaite approximation
- * to the degrees of freedom is used,
- * as described
+ * to the degrees of freedom is used,
+ * as described
* <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
* here.</a> To perform the test under the assumption of equal subpopulation
* variances, use {@link #homoscedasticTTest(double[], double[])}.</p>
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the p-value depends on the assumptions of the parametric
- * t-test procedure, as discussed
+ * t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
* here</a></p>
* <p>
@@ -488,8 +488,8 @@
public abstract double tTest(double[] sample1, double[] sample2)
throws IllegalArgumentException, MathException;
/**
- * Returns the <i>observed significance level</i>, or
- * <i>p-value</i>, associated with a two-sample, two-tailed t-test
+ * Returns the <i>observed significance level</i>, or
+ * <i>p-value</i>, associated with a two-sample, two-tailed t-test
* comparing the means of the input arrays, under the assumption that
* the two samples are drawn from subpopulations with equal variances.
* To perform the test without the equal variances assumption, use
@@ -497,7 +497,7 @@
* <p>
* The number returned is the smallest significance level
* at which one can reject the null hypothesis that the two means are
- * equal in favor of the two-sided alternative that they are different.
+ * equal in favor of the two-sided alternative that they are different.
* For a one-sided test, divide the returned value by 2.</p>
* <p>
* A pooled variance estimate is used to compute the t-statistic. See
@@ -506,7 +506,7 @@
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the p-value depends on the assumptions of the parametric
- * t-test procedure, as discussed
+ * t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
* here</a></p>
* <p>
@@ -525,17 +525,17 @@
double[] sample2)
throws IllegalArgumentException, MathException;
/**
- * Performs a
+ * Performs a
* <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
- * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
- * and <code>sample2</code> are drawn from populations with the same mean,
+ * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
+ * and <code>sample2</code> are drawn from populations with the same mean,
* with significance level <code>alpha</code>. This test does not assume
* that the subpopulation variances are equal. To perform the test assuming
- * equal variances, use
+ * equal variances, use
* {@link #homoscedasticTTest(double[], double[], double)}.
* <p>
* Returns <code>true</code> iff the null hypothesis that the means are
- * equal can be rejected with confidence <code>1 - alpha</code>. To
+ * equal can be rejected with confidence <code>1 - alpha</code>. To
* perform a 1-sided test, use <code>alpha * 2</code></p>
* <p>
* See {@link #t(double[], double[])} for the formula used to compute the
@@ -545,18 +545,18 @@
* <p>
* <strong>Examples:</strong><br><ol>
* <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
- * the 95% level, use
+ * the 95% level, use
* <br><code>tTest(sample1, sample2, 0.05). </code>
* </li>
* <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>,
* at the 99% level, first verify that the measured mean of <code>sample 1</code>
- * is less than the mean of <code>sample 2</code> and then use
+ * is less than the mean of <code>sample 2</code> and then use
* <br><code>tTest(sample1, sample2, 0.02) </code>
* </li></ol></p>
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the test depends on the assumptions of the parametric
- * t-test procedure, as discussed
+ * t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
* here</a></p>
* <p>
@@ -569,7 +569,7 @@
* @param sample1 array of sample data values
* @param sample2 array of sample data values
* @param alpha significance level of the test
- * @return true if the null hypothesis can be rejected with
+ * @return true if the null hypothesis can be rejected with
* confidence 1 - alpha
* @throws IllegalArgumentException if the preconditions are not met
* @throws MathException if an error occurs performing the test
@@ -580,19 +580,19 @@
double alpha)
throws IllegalArgumentException, MathException;
/**
- * Performs a
+ * Performs a
* <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
- * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
- * and <code>sample2</code> are drawn from populations with the same mean,
+ * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
+ * and <code>sample2</code> are drawn from populations with the same mean,
* with significance level <code>alpha</code>, assuming that the
- * subpopulation variances are equal. Use
+ * subpopulation variances are equal. Use
* {@link #tTest(double[], double[], double)} to perform the test without
* the assumption of equal variances.
* <p>
* Returns <code>true</code> iff the null hypothesis that the means are
- * equal can be rejected with confidence <code>1 - alpha</code>. To
+ * equal can be rejected with confidence <code>1 - alpha</code>. To
* perform a 1-sided test, use <code>alpha * 2.</code> To perform the test
- * without the assumption of equal subpopulation variances, use
+ * without the assumption of equal subpopulation variances, use
* {@link #tTest(double[], double[], double)}.</p>
* <p>
* A pooled variance estimate is used to compute the t-statistic. See
@@ -604,7 +604,7 @@
* the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code>
* </li>
* <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2, </code>
- * at the 99% level, first verify that the measured mean of
+ * at the 99% level, first verify that the measured mean of
* <code>sample 1</code> is less than the mean of <code>sample 2</code>
* and then use
* <br><code>tTest(sample1, sample2, 0.02) </code>
@@ -612,7 +612,7 @@
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the test depends on the assumptions of the parametric
- * t-test procedure, as discussed
+ * t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
* here</a></p>
* <p>
@@ -625,7 +625,7 @@
* @param sample1 array of sample data values
* @param sample2 array of sample data values
* @param alpha significance level of the test
- * @return true if the null hypothesis can be rejected with
+ * @return true if the null hypothesis can be rejected with
* confidence 1 - alpha
* @throws IllegalArgumentException if the preconditions are not met
* @throws MathException if an error occurs performing the test
@@ -636,25 +636,25 @@
double alpha)
throws IllegalArgumentException, MathException;
/**
- * Returns the <i>observed significance level</i>, or
- * <i>p-value</i>, associated with a two-sample, two-tailed t-test
+ * Returns the <i>observed significance level</i>, or
+ * <i>p-value</i>, associated with a two-sample, two-tailed t-test
* comparing the means of the datasets described by two StatisticalSummary
* instances.
* <p>
* The number returned is the smallest significance level
* at which one can reject the null hypothesis that the two means are
- * equal in favor of the two-sided alternative that they are different.
+ * equal in favor of the two-sided alternative that they are different.
* For a one-sided test, divide the returned value by 2.</p>
* <p>
* The test does not assume that the underlying popuation variances are
- * equal and it uses approximated degrees of freedom computed from the
+ * equal and it uses approximated degrees of freedom computed from the
* sample data to compute the p-value. To perform the test assuming
- * equal variances, use
+ * equal variances, use
* {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p>
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the p-value depends on the assumptions of the parametric
- * t-test procedure, as discussed
+ * t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
* here</a></p>
* <p>
@@ -674,8 +674,8 @@
StatisticalSummary sampleStats2)
throws IllegalArgumentException, MathException;
/**
- * Returns the <i>observed significance level</i>, or
- * <i>p-value</i>, associated with a two-sample, two-tailed t-test
+ * Returns the <i>observed significance level</i>, or
+ * <i>p-value</i>, associated with a two-sample, two-tailed t-test
* comparing the means of the datasets described by two StatisticalSummary
* instances, under the hypothesis of equal subpopulation variances. To
* perform a test without the equal variances assumption, use
@@ -683,7 +683,7 @@
* <p>
* The number returned is the smallest significance level
* at which one can reject the null hypothesis that the two means are
- * equal in favor of the two-sided alternative that they are different.
+ * equal in favor of the two-sided alternative that they are different.
* For a one-sided test, divide the returned value by 2.</p>
* <p>
* See {@link #homoscedasticT(double[], double[])} for the formula used to
@@ -692,7 +692,7 @@
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the p-value depends on the assumptions of the parametric
- * t-test procedure, as discussed
+ * t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
* </p><p>
* <strong>Preconditions</strong>: <ul>
@@ -711,9 +711,9 @@
StatisticalSummary sampleStats2)
throws IllegalArgumentException, MathException;
/**
- * Performs a
+ * Performs a
* <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
- * two-sided t-test</a> evaluating the null hypothesis that
+ * two-sided t-test</a> evaluating the null hypothesis that
* <code>sampleStats1</code> and <code>sampleStats2</code> describe
* datasets drawn from populations with the same mean, with significance
* level <code>alpha</code>. This test does not assume that the
@@ -722,7 +722,7 @@
* {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
* <p>
* Returns <code>true</code> iff the null hypothesis that the means are
- * equal can be rejected with confidence <code>1 - alpha</code>. To
+ * equal can be rejected with confidence <code>1 - alpha</code>. To
* perform a 1-sided test, use <code>alpha * 2</code></p>
* <p>
* See {@link #t(double[], double[])} for the formula used to compute the
@@ -732,19 +732,19 @@
* <p>
* <strong>Examples:</strong><br><ol>
* <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
- * the 95%, use
+ * the 95%, use
* <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
* </li>
* <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
- * at the 99% level, first verify that the measured mean of
+ * at the 99% level, first verify that the measured mean of
* <code>sample 1</code> is less than the mean of <code>sample 2</code>
- * and then use
+ * and then use
* <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code>
* </li></ol></p>
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the test depends on the assumptions of the parametric
- * t-test procedure, as discussed
+ * t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
* here</a></p>
* <p>
@@ -758,7 +758,7 @@
* @param sampleStats1 StatisticalSummary describing sample data values
* @param sampleStats2 StatisticalSummary describing sample data values
* @param alpha significance level of the test
- * @return true if the null hypothesis can be rejected with
+ * @return true if the null hypothesis can be rejected with
* confidence 1 - alpha
* @throws IllegalArgumentException if the preconditions are not met
* @throws MathException if an error occurs performing the test
@@ -768,4 +768,4 @@
StatisticalSummary sampleStats2,
double alpha)
throws IllegalArgumentException, MathException;
-}
\ No newline at end of file
+}
Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/inference/TTestImpl.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/inference/TTestImpl.java?rev=811685&r1=811684&r2=811685&view=diff
==============================================================================
--- commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/inference/TTestImpl.java (original)
+++ commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/inference/TTestImpl.java Sat Sep 5 17:36:48 2009
@@ -35,14 +35,14 @@
/** Distribution used to compute inference statistics. */
private TDistribution distribution;
-
+
/**
* Default constructor.
*/
public TTestImpl() {
this(new TDistributionImpl(1.0));
}
-
+
/**
* Create a test instance using the given distribution for computing
* inference statistics.
@@ -53,13 +53,13 @@
super();
setDistribution(t);
}
-
+
/**
- * Computes a paired, 2-sample t-statistic based on the data in the input
+ * Computes a paired, 2-sample t-statistic based on the data in the input
* arrays. The t-statistic returned is equivalent to what would be returned by
* computing the one-sample t-statistic {@link #t(double, double[])}, with
- * <code>mu = 0</code> and the sample array consisting of the (signed)
- * differences between corresponding entries in <code>sample1</code> and
+ * <code>mu = 0</code> and the sample array consisting of the (signed)
+ * differences between corresponding entries in <code>sample1</code> and
* <code>sample2.</code>
* <p>
* <strong>Preconditions</strong>: <ul>
@@ -79,30 +79,30 @@
checkSampleData(sample1);
checkSampleData(sample2);
double meanDifference = StatUtils.meanDifference(sample1, sample2);
- return t(meanDifference, 0,
+ return t(meanDifference, 0,
StatUtils.varianceDifference(sample1, sample2, meanDifference),
sample1.length);
}
/**
- * Returns the <i>observed significance level</i>, or
- * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test
+ * Returns the <i>observed significance level</i>, or
+ * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test
* based on the data in the input arrays.
* <p>
* The number returned is the smallest significance level
* at which one can reject the null hypothesis that the mean of the paired
- * differences is 0 in favor of the two-sided alternative that the mean paired
- * difference is not equal to 0. For a one-sided test, divide the returned
+ * differences is 0 in favor of the two-sided alternative that the mean paired
+ * difference is not equal to 0. For a one-sided test, divide the returned
* value by 2.</p>
* <p>
* This test is equivalent to a one-sample t-test computed using
* {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample
- * array consisting of the signed differences between corresponding elements of
+ * array consisting of the signed differences between corresponding elements of
* <code>sample1</code> and <code>sample2.</code></p>
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the p-value depends on the assumptions of the parametric
- * t-test procedure, as discussed
+ * t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
* here</a></p>
* <p>
@@ -120,30 +120,30 @@
public double pairedTTest(double[] sample1, double[] sample2)
throws IllegalArgumentException, MathException {
double meanDifference = StatUtils.meanDifference(sample1, sample2);
- return tTest(meanDifference, 0,
- StatUtils.varianceDifference(sample1, sample2, meanDifference),
+ return tTest(meanDifference, 0,
+ StatUtils.varianceDifference(sample1, sample2, meanDifference),
sample1.length);
}
/**
- * Performs a paired t-test evaluating the null hypothesis that the
+ * Performs a paired t-test evaluating the null hypothesis that the
* mean of the paired differences between <code>sample1</code> and
- * <code>sample2</code> is 0 in favor of the two-sided alternative that the
- * mean paired difference is not equal to 0, with significance level
+ * <code>sample2</code> is 0 in favor of the two-sided alternative that the
+ * mean paired difference is not equal to 0, with significance level
* <code>alpha</code>.
* <p>
- * Returns <code>true</code> iff the null hypothesis can be rejected with
- * confidence <code>1 - alpha</code>. To perform a 1-sided test, use
+ * Returns <code>true</code> iff the null hypothesis can be rejected with
+ * confidence <code>1 - alpha</code>. To perform a 1-sided test, use
* <code>alpha * 2</code></p>
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the test depends on the assumptions of the parametric
- * t-test procedure, as discussed
+ * t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
* here</a></p>
* <p>
* <strong>Preconditions</strong>: <ul>
- * <li>The input array lengths must be the same and their common length
+ * <li>The input array lengths must be the same and their common length
* must be at least 2.
* </li>
* <li> <code> 0 < alpha < 0.5 </code>
@@ -152,7 +152,7 @@
* @param sample1 array of sample data values
* @param sample2 array of sample data values
* @param alpha significance level of the test
- * @return true if the null hypothesis can be rejected with
+ * @return true if the null hypothesis can be rejected with
* confidence 1 - alpha
* @throws IllegalArgumentException if the preconditions are not met
* @throws MathException if an error occurs performing the test
@@ -164,7 +164,7 @@
}
/**
- * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
+ * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
* t statistic </a> given observed values and a comparison constant.
* <p>
* This statistic can be used to perform a one sample t-test for the mean.
@@ -187,7 +187,7 @@
/**
* Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
- * t statistic </a> to use in comparing the mean of the dataset described by
+ * t statistic </a> to use in comparing the mean of the dataset described by
* <code>sampleStats</code> to <code>mu</code>.
* <p>
* This statistic can be used to perform a one sample t-test for the mean.
@@ -209,7 +209,7 @@
}
/**
- * Computes a 2-sample t statistic, under the hypothesis of equal
+ * Computes a 2-sample t statistic, under the hypothesis of equal
* subpopulation variances. To compute a t-statistic without the
* equal variances hypothesis, use {@link #t(double[], double[])}.
* <p>
@@ -220,15 +220,15 @@
* <p>
* <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
* </p><p>
- * where <strong><code>n1</code></strong> is the size of first sample;
- * <strong><code> n2</code></strong> is the size of second sample;
- * <strong><code> m1</code></strong> is the mean of first sample;
+ * where <strong><code>n1</code></strong> is the size of first sample;
+ * <strong><code> n2</code></strong> is the size of second sample;
+ * <strong><code> m1</code></strong> is the mean of first sample;
* <strong><code> m2</code></strong> is the mean of second sample</li>
* </ul>
* and <strong><code>var</code></strong> is the pooled variance estimate:
* </p><p>
* <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
- * </p><p>
+ * </p><p>
* with <strong><code>var1<code></strong> the variance of the first sample and
* <strong><code>var2</code></strong> the variance of the second sample.
* </p><p>
@@ -249,7 +249,7 @@
StatUtils.variance(sample1), StatUtils.variance(sample2),
sample1.length, sample2.length);
}
-
+
/**
* Computes a 2-sample t statistic, without the hypothesis of equal
* subpopulation variances. To compute a t-statistic assuming equal
@@ -263,11 +263,11 @@
* <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
* </p><p>
* where <strong><code>n1</code></strong> is the size of the first sample
- * <strong><code> n2</code></strong> is the size of the second sample;
- * <strong><code> m1</code></strong> is the mean of the first sample;
+ * <strong><code> n2</code></strong> is the size of the second sample;
+ * <strong><code> m1</code></strong> is the mean of the first sample;
* <strong><code> m2</code></strong> is the mean of the second sample;
* <strong><code> var1</code></strong> is the variance of the first sample;
- * <strong><code> var2</code></strong> is the variance of the second sample;
+ * <strong><code> var2</code></strong> is the variance of the second sample;
* </p><p>
* <strong>Preconditions</strong>: <ul>
* <li>The observed array lengths must both be at least 2.
@@ -290,7 +290,7 @@
/**
* Computes a 2-sample t statistic </a>, comparing the means of the datasets
* described by two {@link StatisticalSummary} instances, without the
- * assumption of equal subpopulation variances. Use
+ * assumption of equal subpopulation variances. Use
* {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
* compute a t-statistic under the equal variances assumption.
* <p>
@@ -301,11 +301,11 @@
* <p>
* <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
* </p><p>
- * where <strong><code>n1</code></strong> is the size of the first sample;
- * <strong><code> n2</code></strong> is the size of the second sample;
- * <strong><code> m1</code></strong> is the mean of the first sample;
+ * where <strong><code>n1</code></strong> is the size of the first sample;
+ * <strong><code> n2</code></strong> is the size of the second sample;
+ * <strong><code> m1</code></strong> is the mean of the first sample;
* <strong><code> m2</code></strong> is the mean of the second sample
- * <strong><code> var1</code></strong> is the variance of the first sample;
+ * <strong><code> var1</code></strong> is the variance of the first sample;
* <strong><code> var2</code></strong> is the variance of the second sample
* </p><p>
* <strong>Preconditions</strong>: <ul>
@@ -318,21 +318,21 @@
* @return t statistic
* @throws IllegalArgumentException if the precondition is not met
*/
- public double t(StatisticalSummary sampleStats1,
+ public double t(StatisticalSummary sampleStats1,
StatisticalSummary sampleStats2)
throws IllegalArgumentException {
checkSampleData(sampleStats1);
checkSampleData(sampleStats2);
- return t(sampleStats1.getMean(), sampleStats2.getMean(),
+ return t(sampleStats1.getMean(), sampleStats2.getMean(),
sampleStats1.getVariance(), sampleStats2.getVariance(),
sampleStats1.getN(), sampleStats2.getN());
}
-
+
/**
* Computes a 2-sample t statistic, comparing the means of the datasets
* described by two {@link StatisticalSummary} instances, under the
* assumption of equal subpopulation variances. To compute a t-statistic
- * without the equal variances assumption, use
+ * without the equal variances assumption, use
* {@link #t(StatisticalSummary, StatisticalSummary)}.
* <p>
* This statistic can be used to perform a (homoscedastic) two-sample
@@ -342,14 +342,14 @@
* <p>
* <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
* </p><p>
- * where <strong><code>n1</code></strong> is the size of first sample;
- * <strong><code> n2</code></strong> is the size of second sample;
- * <strong><code> m1</code></strong> is the mean of first sample;
+ * where <strong><code>n1</code></strong> is the size of first sample;
+ * <strong><code> n2</code></strong> is the size of second sample;
+ * <strong><code> m1</code></strong> is the mean of first sample;
* <strong><code> m2</code></strong> is the mean of second sample
* and <strong><code>var</code></strong> is the pooled variance estimate:
* </p><p>
* <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
- * <p>
+ * <p>
* with <strong><code>var1<code></strong> the variance of the first sample and
* <strong><code>var2</code></strong> the variance of the second sample.
* </p><p>
@@ -363,30 +363,30 @@
* @return t statistic
* @throws IllegalArgumentException if the precondition is not met
*/
- public double homoscedasticT(StatisticalSummary sampleStats1,
+ public double homoscedasticT(StatisticalSummary sampleStats1,
StatisticalSummary sampleStats2)
throws IllegalArgumentException {
checkSampleData(sampleStats1);
checkSampleData(sampleStats2);
- return homoscedasticT(sampleStats1.getMean(), sampleStats2.getMean(),
- sampleStats1.getVariance(), sampleStats2.getVariance(),
+ return homoscedasticT(sampleStats1.getMean(), sampleStats2.getMean(),
+ sampleStats1.getVariance(), sampleStats2.getVariance(),
sampleStats1.getN(), sampleStats2.getN());
}
/**
- * Returns the <i>observed significance level</i>, or
- * <i>p-value</i>, associated with a one-sample, two-tailed t-test
+ * Returns the <i>observed significance level</i>, or
+ * <i>p-value</i>, associated with a one-sample, two-tailed t-test
* comparing the mean of the input array with the constant <code>mu</code>.
* <p>
* The number returned is the smallest significance level
- * at which one can reject the null hypothesis that the mean equals
+ * at which one can reject the null hypothesis that the mean equals
* <code>mu</code> in favor of the two-sided alternative that the mean
- * is different from <code>mu</code>. For a one-sided test, divide the
+ * is different from <code>mu</code>. For a one-sided test, divide the
* returned value by 2.</p>
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the test depends on the assumptions of the parametric
- * t-test procedure, as discussed
+ * t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
* </p><p>
* <strong>Preconditions</strong>: <ul>
@@ -411,8 +411,8 @@
* two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
* which <code>sample</code> is drawn equals <code>mu</code>.
* <p>
- * Returns <code>true</code> iff the null hypothesis can be
- * rejected with confidence <code>1 - alpha</code>. To
+ * Returns <code>true</code> iff the null hypothesis can be
+ * rejected with confidence <code>1 - alpha</code>. To
* perform a 1-sided test, use <code>alpha * 2</code>
* </p><p>
* <strong>Examples:</strong><br><ol>
@@ -420,14 +420,14 @@
* the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
* </li>
* <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
- * at the 99% level, first verify that the measured sample mean is less
- * than <code>mu</code> and then use
+ * at the 99% level, first verify that the measured sample mean is less
+ * than <code>mu</code> and then use
* <br><code>tTest(mu, sample, 0.02) </code>
* </li></ol></p>
* <p>
* <strong>Usage Note:</strong><br>
- * The validity of the test depends on the assumptions of the one-sample
- * parametric t-test procedure, as discussed
+ * The validity of the test depends on the assumptions of the one-sample
+ * parametric t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
* </p><p>
* <strong>Preconditions</strong>: <ul>
@@ -448,20 +448,20 @@
}
/**
- * Returns the <i>observed significance level</i>, or
- * <i>p-value</i>, associated with a one-sample, two-tailed t-test
+ * Returns the <i>observed significance level</i>, or
+ * <i>p-value</i>, associated with a one-sample, two-tailed t-test
* comparing the mean of the dataset described by <code>sampleStats</code>
* with the constant <code>mu</code>.
* <p>
* The number returned is the smallest significance level
- * at which one can reject the null hypothesis that the mean equals
+ * at which one can reject the null hypothesis that the mean equals
* <code>mu</code> in favor of the two-sided alternative that the mean
- * is different from <code>mu</code>. For a one-sided test, divide the
+ * is different from <code>mu</code>. For a one-sided test, divide the
* returned value by 2.</p>
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the test depends on the assumptions of the parametric
- * t-test procedure, as discussed
+ * t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
* here</a></p>
* <p>
@@ -497,14 +497,14 @@
* the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
* </li>
* <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
- * at the 99% level, first verify that the measured sample mean is less
- * than <code>mu</code> and then use
+ * at the 99% level, first verify that the measured sample mean is less
+ * than <code>mu</code> and then use
* <br><code>tTest(mu, sampleStats, 0.02) </code>
* </li></ol></p>
* <p>
* <strong>Usage Note:</strong><br>
- * The validity of the test depends on the assumptions of the one-sample
- * parametric t-test procedure, as discussed
+ * The validity of the test depends on the assumptions of the one-sample
+ * parametric t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
* </p><p>
* <strong>Preconditions</strong>: <ul>
@@ -526,28 +526,28 @@
}
/**
- * Returns the <i>observed significance level</i>, or
- * <i>p-value</i>, associated with a two-sample, two-tailed t-test
+ * Returns the <i>observed significance level</i>, or
+ * <i>p-value</i>, associated with a two-sample, two-tailed t-test
* comparing the means of the input arrays.
* <p>
* The number returned is the smallest significance level
* at which one can reject the null hypothesis that the two means are
- * equal in favor of the two-sided alternative that they are different.
+ * equal in favor of the two-sided alternative that they are different.
* For a one-sided test, divide the returned value by 2.</p>
* <p>
* The test does not assume that the underlying popuation variances are
- * equal and it uses approximated degrees of freedom computed from the
+ * equal and it uses approximated degrees of freedom computed from the
* sample data to compute the p-value. The t-statistic used is as defined in
* {@link #t(double[], double[])} and the Welch-Satterthwaite approximation
- * to the degrees of freedom is used,
- * as described
+ * to the degrees of freedom is used,
+ * as described
* <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
* here.</a> To perform the test under the assumption of equal subpopulation
* variances, use {@link #homoscedasticTTest(double[], double[])}.</p>
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the p-value depends on the assumptions of the parametric
- * t-test procedure, as discussed
+ * t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
* here</a></p>
* <p>
@@ -569,10 +569,10 @@
StatUtils.variance(sample1), StatUtils.variance(sample2),
sample1.length, sample2.length);
}
-
+
/**
- * Returns the <i>observed significance level</i>, or
- * <i>p-value</i>, associated with a two-sample, two-tailed t-test
+ * Returns the <i>observed significance level</i>, or
+ * <i>p-value</i>, associated with a two-sample, two-tailed t-test
* comparing the means of the input arrays, under the assumption that
* the two samples are drawn from subpopulations with equal variances.
* To perform the test without the equal variances assumption, use
@@ -580,7 +580,7 @@
* <p>
* The number returned is the smallest significance level
* at which one can reject the null hypothesis that the two means are
- * equal in favor of the two-sided alternative that they are different.
+ * equal in favor of the two-sided alternative that they are different.
* For a one-sided test, divide the returned value by 2.</p>
* <p>
* A pooled variance estimate is used to compute the t-statistic. See
@@ -589,7 +589,7 @@
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the p-value depends on the assumptions of the parametric
- * t-test procedure, as discussed
+ * t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
* here</a></p>
* <p>
@@ -607,47 +607,47 @@
throws IllegalArgumentException, MathException {
checkSampleData(sample1);
checkSampleData(sample2);
- return homoscedasticTTest(StatUtils.mean(sample1),
+ return homoscedasticTTest(StatUtils.mean(sample1),
StatUtils.mean(sample2), StatUtils.variance(sample1),
- StatUtils.variance(sample2), sample1.length,
+ StatUtils.variance(sample2), sample1.length,
sample2.length);
}
-
+
/**
- * Performs a
+ * Performs a
* <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
- * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
- * and <code>sample2</code> are drawn from populations with the same mean,
+ * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
+ * and <code>sample2</code> are drawn from populations with the same mean,
* with significance level <code>alpha</code>. This test does not assume
* that the subpopulation variances are equal. To perform the test assuming
- * equal variances, use
+ * equal variances, use
* {@link #homoscedasticTTest(double[], double[], double)}.
* <p>
* Returns <code>true</code> iff the null hypothesis that the means are
- * equal can be rejected with confidence <code>1 - alpha</code>. To
+ * equal can be rejected with confidence <code>1 - alpha</code>. To
* perform a 1-sided test, use <code>alpha / 2</code></p>
* <p>
* See {@link #t(double[], double[])} for the formula used to compute the
* t-statistic. Degrees of freedom are approximated using the
* <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
* Welch-Satterthwaite approximation.</a></p>
-
+
* <p>
* <strong>Examples:</strong><br><ol>
* <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
- * the 95% level, use
+ * the 95% level, use
* <br><code>tTest(sample1, sample2, 0.05). </code>
* </li>
* <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code> at
* the 99% level, first verify that the measured mean of <code>sample 1</code>
- * is less than the mean of <code>sample 2</code> and then use
+ * is less than the mean of <code>sample 2</code> and then use
* <br><code>tTest(sample1, sample2, 0.02) </code>
* </li></ol></p>
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the test depends on the assumptions of the parametric
- * t-test procedure, as discussed
+ * t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
* here</a></p>
* <p>
@@ -660,7 +660,7 @@
* @param sample1 array of sample data values
* @param sample2 array of sample data values
* @param alpha significance level of the test
- * @return true if the null hypothesis can be rejected with
+ * @return true if the null hypothesis can be rejected with
* confidence 1 - alpha
* @throws IllegalArgumentException if the preconditions are not met
* @throws MathException if an error occurs performing the test
@@ -671,21 +671,21 @@
checkSignificanceLevel(alpha);
return (tTest(sample1, sample2) < alpha);
}
-
+
/**
- * Performs a
+ * Performs a
* <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
- * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
- * and <code>sample2</code> are drawn from populations with the same mean,
+ * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
+ * and <code>sample2</code> are drawn from populations with the same mean,
* with significance level <code>alpha</code>, assuming that the
- * subpopulation variances are equal. Use
+ * subpopulation variances are equal. Use
* {@link #tTest(double[], double[], double)} to perform the test without
* the assumption of equal variances.
* <p>
* Returns <code>true</code> iff the null hypothesis that the means are
- * equal can be rejected with confidence <code>1 - alpha</code>. To
+ * equal can be rejected with confidence <code>1 - alpha</code>. To
* perform a 1-sided test, use <code>alpha * 2.</code> To perform the test
- * without the assumption of equal subpopulation variances, use
+ * without the assumption of equal subpopulation variances, use
* {@link #tTest(double[], double[], double)}.</p>
* <p>
* A pooled variance estimate is used to compute the t-statistic. See
@@ -697,7 +697,7 @@
* the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code>
* </li>
* <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2, </code>
- * at the 99% level, first verify that the measured mean of
+ * at the 99% level, first verify that the measured mean of
* <code>sample 1</code> is less than the mean of <code>sample 2</code>
* and then use
* <br><code>tTest(sample1, sample2, 0.02) </code>
@@ -705,7 +705,7 @@
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the test depends on the assumptions of the parametric
- * t-test procedure, as discussed
+ * t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
* here</a></p>
* <p>
@@ -718,7 +718,7 @@
* @param sample1 array of sample data values
* @param sample2 array of sample data values
* @param alpha significance level of the test
- * @return true if the null hypothesis can be rejected with
+ * @return true if the null hypothesis can be rejected with
* confidence 1 - alpha
* @throws IllegalArgumentException if the preconditions are not met
* @throws MathException if an error occurs performing the test
@@ -731,25 +731,25 @@
}
/**
- * Returns the <i>observed significance level</i>, or
- * <i>p-value</i>, associated with a two-sample, two-tailed t-test
+ * Returns the <i>observed significance level</i>, or
+ * <i>p-value</i>, associated with a two-sample, two-tailed t-test
* comparing the means of the datasets described by two StatisticalSummary
* instances.
* <p>
* The number returned is the smallest significance level
* at which one can reject the null hypothesis that the two means are
- * equal in favor of the two-sided alternative that they are different.
+ * equal in favor of the two-sided alternative that they are different.
* For a one-sided test, divide the returned value by 2.</p>
* <p>
* The test does not assume that the underlying popuation variances are
- * equal and it uses approximated degrees of freedom computed from the
+ * equal and it uses approximated degrees of freedom computed from the
* sample data to compute the p-value. To perform the test assuming
- * equal variances, use
+ * equal variances, use
* {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p>
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the p-value depends on the assumptions of the parametric
- * t-test procedure, as discussed
+ * t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
* here</a></p>
* <p>
@@ -769,13 +769,13 @@
checkSampleData(sampleStats1);
checkSampleData(sampleStats2);
return tTest(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
- sampleStats2.getVariance(), sampleStats1.getN(),
+ sampleStats2.getVariance(), sampleStats1.getN(),
sampleStats2.getN());
}
-
+
/**
- * Returns the <i>observed significance level</i>, or
- * <i>p-value</i>, associated with a two-sample, two-tailed t-test
+ * Returns the <i>observed significance level</i>, or
+ * <i>p-value</i>, associated with a two-sample, two-tailed t-test
* comparing the means of the datasets described by two StatisticalSummary
* instances, under the hypothesis of equal subpopulation variances. To
* perform a test without the equal variances assumption, use
@@ -783,7 +783,7 @@
* <p>
* The number returned is the smallest significance level
* at which one can reject the null hypothesis that the two means are
- * equal in favor of the two-sided alternative that they are different.
+ * equal in favor of the two-sided alternative that they are different.
* For a one-sided test, divide the returned value by 2.</p>
* <p>
* See {@link #homoscedasticT(double[], double[])} for the formula used to
@@ -792,7 +792,7 @@
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the p-value depends on the assumptions of the parametric
- * t-test procedure, as discussed
+ * t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
* </p><p>
* <strong>Preconditions</strong>: <ul>
@@ -806,21 +806,21 @@
* @throws IllegalArgumentException if the precondition is not met
* @throws MathException if an error occurs computing the p-value
*/
- public double homoscedasticTTest(StatisticalSummary sampleStats1,
+ public double homoscedasticTTest(StatisticalSummary sampleStats1,
StatisticalSummary sampleStats2)
throws IllegalArgumentException, MathException {
checkSampleData(sampleStats1);
checkSampleData(sampleStats2);
return homoscedasticTTest(sampleStats1.getMean(),
sampleStats2.getMean(), sampleStats1.getVariance(),
- sampleStats2.getVariance(), sampleStats1.getN(),
+ sampleStats2.getVariance(), sampleStats1.getN(),
sampleStats2.getN());
}
/**
- * Performs a
+ * Performs a
* <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
- * two-sided t-test</a> evaluating the null hypothesis that
+ * two-sided t-test</a> evaluating the null hypothesis that
* <code>sampleStats1</code> and <code>sampleStats2</code> describe
* datasets drawn from populations with the same mean, with significance
* level <code>alpha</code>. This test does not assume that the
@@ -829,7 +829,7 @@
* {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
* <p>
* Returns <code>true</code> iff the null hypothesis that the means are
- * equal can be rejected with confidence <code>1 - alpha</code>. To
+ * equal can be rejected with confidence <code>1 - alpha</code>. To
* perform a 1-sided test, use <code>alpha * 2</code></p>
* <p>
* See {@link #t(double[], double[])} for the formula used to compute the
@@ -839,19 +839,19 @@
* <p>
* <strong>Examples:</strong><br><ol>
* <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
- * the 95%, use
+ * the 95%, use
* <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
* </li>
* <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
- * at the 99% level, first verify that the measured mean of
+ * at the 99% level, first verify that the measured mean of
* <code>sample 1</code> is less than the mean of <code>sample 2</code>
- * and then use
+ * and then use
* <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code>
* </li></ol></p>
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the test depends on the assumptions of the parametric
- * t-test procedure, as discussed
+ * t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
* here</a></p>
* <p>
@@ -865,7 +865,7 @@
* @param sampleStats1 StatisticalSummary describing sample data values
* @param sampleStats2 StatisticalSummary describing sample data values
* @param alpha significance level of the test
- * @return true if the null hypothesis can be rejected with
+ * @return true if the null hypothesis can be rejected with
* confidence 1 - alpha
* @throws IllegalArgumentException if the preconditions are not met
* @throws MathException if an error occurs performing the test
@@ -876,12 +876,12 @@
checkSignificanceLevel(alpha);
return (tTest(sampleStats1, sampleStats2) < alpha);
}
-
- //----------------------------------------------- Protected methods
+
+ //----------------------------------------------- Protected methods
/**
* Computes approximate degrees of freedom for 2-sample t-test.
- *
+ *
* @param v1 first sample variance
* @param v2 second sample variance
* @param n1 first sample n
@@ -896,7 +896,7 @@
/**
* Computes t test statistic for 1-sample t-test.
- *
+ *
* @param m sample mean
* @param mu constant to test against
* @param v sample variance
@@ -906,12 +906,12 @@
protected double t(double m, double mu, double v, double n) {
return (m - mu) / Math.sqrt(v / n);
}
-
+
/**
* Computes t test statistic for 2-sample t-test.
* <p>
* Does not assume that subpopulation variances are equal.</p>
- *
+ *
* @param m1 first sample mean
* @param m2 second sample mean
* @param v1 first sample variance
@@ -924,11 +924,11 @@
double n2) {
return (m1 - m2) / Math.sqrt((v1 / n1) + (v2 / n2));
}
-
+
/**
* Computes t test statistic for 2-sample t-test under the hypothesis
* of equal subpopulation variances.
- *
+ *
* @param m1 first sample mean
* @param m2 second sample mean
* @param v1 first sample variance
@@ -939,13 +939,13 @@
*/
protected double homoscedasticT(double m1, double m2, double v1,
double v2, double n1, double n2) {
- double pooledVariance = ((n1 - 1) * v1 + (n2 -1) * v2 ) / (n1 + n2 - 2);
+ double pooledVariance = ((n1 - 1) * v1 + (n2 -1) * v2 ) / (n1 + n2 - 2);
return (m1 - m2) / Math.sqrt(pooledVariance * (1d / n1 + 1d / n2));
}
-
+
/**
* Computes p-value for 2-sided, 1-sample t-test.
- *
+ *
* @param m sample mean
* @param mu constant to test against
* @param v sample variance
@@ -965,7 +965,7 @@
* <p>
* Does not assume subpopulation variances are equal. Degrees of freedom
* are estimated from the data.</p>
- *
+ *
* @param m1 first sample mean
* @param m2 second sample mean
* @param v1 first sample variance
@@ -975,7 +975,7 @@
* @return p-value
* @throws MathException if an error occurs computing the p-value
*/
- protected double tTest(double m1, double m2, double v1, double v2,
+ protected double tTest(double m1, double m2, double v1, double v2,
double n1, double n2)
throws MathException {
double t = Math.abs(t(m1, m2, v1, v2, n1, n2));
@@ -984,13 +984,13 @@
distribution.setDegreesOfFreedom(degreesOfFreedom);
return 2.0 * distribution.cumulativeProbability(-t);
}
-
+
/**
* Computes p-value for 2-sided, 2-sample t-test, under the assumption
* of equal subpopulation variances.
* <p>
* The sum of the sample sizes minus 2 is used as degrees of freedom.</p>
- *
+ *
* @param m1 first sample mean
* @param m2 second sample mean
* @param v1 first sample variance
@@ -1008,7 +1008,7 @@
distribution.setDegreesOfFreedom(degreesOfFreedom);
return 2.0 * distribution.cumulativeProbability(-t);
}
-
+
/**
* Modify the distribution used to compute inference statistics.
* @param value the new distribution
Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/inference/TestUtils.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/inference/TestUtils.java?rev=811685&r1=811684&r2=811685&view=diff
==============================================================================
--- commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/inference/TestUtils.java (original)
+++ commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/inference/TestUtils.java Sat Sep 5 17:36:48 2009
@@ -25,7 +25,7 @@
* perform inference tests.
*
* @since 1.1
- * @version $Revision$ $Date$
+ * @version $Revision$ $Date$
*/
public class TestUtils {
/**
@@ -34,100 +34,100 @@
protected TestUtils() {
super();
}
-
+
/** Singleton TTest instance using default implementation. */
private static TTest tTest = new TTestImpl();
-
+
/** Singleton ChiSquareTest instance using default implementation. */
- private static ChiSquareTest chiSquareTest =
+ private static ChiSquareTest chiSquareTest =
new ChiSquareTestImpl();
-
+
/** Singleton ChiSquareTest instance using default implementation. */
- private static UnknownDistributionChiSquareTest unknownDistributionChiSquareTest =
+ private static UnknownDistributionChiSquareTest unknownDistributionChiSquareTest =
new ChiSquareTestImpl();
-
+
/** Singleton OneWayAnova instance using default implementation. */
private static OneWayAnova oneWayAnova =
new OneWayAnovaImpl();
-
+
/**
* Set the (singleton) TTest instance.
- *
+ *
* @param chiSquareTest the new instance to use
* @since 1.2
*/
public static void setChiSquareTest(TTest chiSquareTest) {
TestUtils.tTest = chiSquareTest;
}
-
+
/**
* Return a (singleton) TTest instance. Does not create a new instance.
- *
+ *
* @return a TTest instance
*/
public static TTest getTTest() {
return tTest;
}
-
+
/**
* Set the (singleton) ChiSquareTest instance.
- *
+ *
* @param chiSquareTest the new instance to use
* @since 1.2
*/
public static void setChiSquareTest(ChiSquareTest chiSquareTest) {
TestUtils.chiSquareTest = chiSquareTest;
}
-
+
/**
* Return a (singleton) ChiSquareTest instance. Does not create a new instance.
- *
+ *
* @return a ChiSquareTest instance
*/
public static ChiSquareTest getChiSquareTest() {
return chiSquareTest;
}
-
+
/**
* Set the (singleton) UnknownDistributionChiSquareTest instance.
- *
+ *
* @param unknownDistributionChiSquareTest the new instance to use
* @since 1.2
*/
public static void setUnknownDistributionChiSquareTest(UnknownDistributionChiSquareTest unknownDistributionChiSquareTest) {
TestUtils.unknownDistributionChiSquareTest = unknownDistributionChiSquareTest;
}
-
+
/**
* Return a (singleton) UnknownDistributionChiSquareTest instance. Does not create a new instance.
- *
+ *
* @return a UnknownDistributionChiSquareTest instance
*/
public static UnknownDistributionChiSquareTest getUnknownDistributionChiSquareTest() {
return unknownDistributionChiSquareTest;
}
-
+
/**
* Set the (singleton) OneWayAnova instance
- *
+ *
* @param oneWayAnova the new instance to use
* @since 1.2
*/
public static void setOneWayAnova(OneWayAnova oneWayAnova) {
TestUtils.oneWayAnova = oneWayAnova;
}
-
+
/**
* Return a (singleton) OneWayAnova instance. Does not create a new instance.
- *
+ *
* @return a OneWayAnova instance
* @since 1.2
*/
public static OneWayAnova getOneWayAnova() {
return oneWayAnova;
}
-
-
+
+
// CHECKSTYLE: stop JavadocMethodCheck
/**
@@ -309,7 +309,7 @@
/**
* @see org.apache.commons.math.stat.inference.ChiSquareTest#chiSquare(long[][])
*/
- public static double chiSquare(long[][] counts)
+ public static double chiSquare(long[][] counts)
throws IllegalArgumentException {
return chiSquareTest.chiSquare(counts);
}
@@ -378,7 +378,7 @@
throws IllegalArgumentException, MathException {
return unknownDistributionChiSquareTest.chiSquareTestDataSetsComparison(observed1, observed2, alpha);
}
-
+
/**
* @see org.apache.commons.math.stat.inference.OneWayAnova#anovaFValue(Collection)
*
@@ -388,17 +388,17 @@
throws IllegalArgumentException, MathException {
return oneWayAnova.anovaFValue(categoryData);
}
-
+
/**
* @see org.apache.commons.math.stat.inference.OneWayAnova#anovaPValue(Collection)
- *
+ *
* @since 1.2
*/
public static double oneWayAnovaPValue(Collection<double[]> categoryData)
throws IllegalArgumentException, MathException {
return oneWayAnova.anovaPValue(categoryData);
}
-
+
/**
* @see org.apache.commons.math.stat.inference.OneWayAnova#anovaTest(Collection,double)
*
Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/inference/UnknownDistributionChiSquareTest.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/inference/UnknownDistributionChiSquareTest.java?rev=811685&r1=811684&r2=811685&view=diff
==============================================================================
--- commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/inference/UnknownDistributionChiSquareTest.java (original)
+++ commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/inference/UnknownDistributionChiSquareTest.java Sat Sep 5 17:36:48 2009
@@ -24,12 +24,12 @@
* but provided by one sample. We compare the second sample against the first.</p>
*
* @version $Revision$ $Date$
- * @since 1.2
+ * @since 1.2
*/
public interface UnknownDistributionChiSquareTest extends ChiSquareTest {
-
+
/**
- * <p>Computes a
+ * <p>Computes a
* <a href="http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/chi2samp.htm">
* Chi-Square two sample test statistic</a> comparing bin frequency counts
* in <code>observed1</code> and <code>observed2</code>. The
@@ -37,7 +37,7 @@
* same. The formula used to compute the test statistic is</p>
* <code>
* ∑[(K * observed1[i] - observed2[i]/K)<sup>2</sup> / (observed1[i] + observed2[i])]
- * </code> where
+ * </code> where
* <br/><code>K = &sqrt;[&sum(observed2 / ∑(observed1)]</code>
* </p>
* <p>This statistic can be used to perform a Chi-Square test evaluating the null hypothesis that
@@ -68,7 +68,7 @@
* <p>Returns the <i>observed significance level</i>, or <a href=
* "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
* p-value</a>, associated with a Chi-Square two sample test comparing
- * bin frequency counts in <code>observed1</code> and
+ * bin frequency counts in <code>observed1</code> and
* <code>observed2</code>.
* </p>
* <p>The number returned is the smallest significance level at which one
@@ -110,7 +110,7 @@
* significance level <code>alpha</code>. Returns true iff the null
* hypothesis can be rejected with 100 * (1 - alpha) percent confidence.
* </p>
- * <p>See {@link #chiSquareDataSetsComparison(long[], long[])} for
+ * <p>See {@link #chiSquareDataSetsComparison(long[], long[])} for
* details on the formula used to compute the Chisquare statistic used
* in the test. The degrees of of freedom used to perform the test is
* one less than the common length of the input observed count arrays.
Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/ranking/NaNStrategy.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/ranking/NaNStrategy.java?rev=811685&r1=811684&r2=811685&view=diff
==============================================================================
--- commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/ranking/NaNStrategy.java (original)
+++ commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/ranking/NaNStrategy.java Sat Sep 5 17:36:48 2009
@@ -34,16 +34,16 @@
* @version $Revision$ $Date$
*/
public enum NaNStrategy {
-
+
/** NaNs are considered minimal in the ordering */
MINIMAL,
-
+
/** NaNs are considered maximal in the ordering */
MAXIMAL,
-
+
/** NaNs are removed before computing ranks */
REMOVED,
-
+
/** NaNs are left in place */
FIXED
}
Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/ranking/NaturalRanking.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/ranking/NaturalRanking.java?rev=811685&r1=811684&r2=811685&view=diff
==============================================================================
--- commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/ranking/NaturalRanking.java (original)
+++ commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/ranking/NaturalRanking.java Sat Sep 5 17:36:48 2009
@@ -31,10 +31,10 @@
/**
* <p> Ranking based on the natural ordering on doubles.</p>
* <p>NaNs are treated according to the configured {@link NaNStrategy} and ties
- * are handled using the selected {@link TiesStrategy}.
+ * are handled using the selected {@link TiesStrategy}.
* Configuration settings are supplied in optional constructor arguments.
* Defaults are {@link NaNStrategy#MAXIMAL} and {@link TiesStrategy#AVERAGE},
- * respectively. When using {@link TiesStrategy#RANDOM}, a
+ * respectively. When using {@link TiesStrategy#RANDOM}, a
* {@link RandomGenerator} may be supplied as a constructor argument.</p>
* <p>Examples:
* <table border="1" cellpadding="3">
@@ -63,27 +63,27 @@
* <td>MINIMAL</td>
* <td>MAXIMUM</td>
* <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table></p>
- *
+ *
* @since 2.0
* @version $Revision$ $Date$
*/
public class NaturalRanking implements RankingAlgorithm {
-
+
/** NaN strategy - defaults to NaNs maximal */
private final NaNStrategy nanStrategy;
-
+
/** Ties strategy - defaults to ties averaged */
private final TiesStrategy tiesStrategy;
-
+
/** Source of random data - used only when ties strategy is RANDOM */
private final RandomData randomData;
-
+
/** default NaN strategy */
public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.MAXIMAL;
-
+
/** default ties strategy */
public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE;
-
+
/**
* Create a NaturalRanking with default strategies for handling ties and NaNs.
*/
@@ -96,7 +96,7 @@
/**
* Create a NaturalRanking with the given TiesStrategy.
- *
+ *
* @param tiesStrategy the TiesStrategy to use
*/
public NaturalRanking(TiesStrategy tiesStrategy) {
@@ -108,19 +108,19 @@
/**
* Create a NaturalRanking with the given NaNStrategy.
- *
+ *
* @param nanStrategy the NaNStrategy to use
*/
public NaturalRanking(NaNStrategy nanStrategy) {
super();
this.nanStrategy = nanStrategy;
tiesStrategy = DEFAULT_TIES_STRATEGY;
- randomData = null;
+ randomData = null;
}
/**
* Create a NaturalRanking with the given NaNStrategy and TiesStrategy.
- *
+ *
* @param nanStrategy NaNStrategy to use
* @param tiesStrategy TiesStrategy to use
*/
@@ -130,11 +130,11 @@
this.tiesStrategy = tiesStrategy;
randomData = new RandomDataImpl();
}
-
+
/**
* Create a NaturalRanking with TiesStrategy.RANDOM and the given
* RandomGenerator as the source of random data.
- *
+ *
* @param randomGenerator source of random data
*/
public NaturalRanking(RandomGenerator randomGenerator) {
@@ -148,7 +148,7 @@
/**
* Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM
* and the given source of random data.
- *
+ *
* @param nanStrategy NaNStrategy to use
* @param randomGenerator source of random data
*/
@@ -159,10 +159,10 @@
this.tiesStrategy = TiesStrategy.RANDOM;
randomData = new RandomDataImpl(randomGenerator);
}
-
+
/**
* Return the NaNStrategy
- *
+ *
* @return returns the NaNStrategy
*/
public NaNStrategy getNanStrategy() {
@@ -171,7 +171,7 @@
/**
* Return the TiesStrategy
- *
+ *
* @return the TiesStrategy
*/
public TiesStrategy getTiesStrategy() {
@@ -182,18 +182,18 @@
* Rank <code>data</code> using the natural ordering on Doubles, with
* NaN values handled according to <code>nanStrategy</code> and ties
* resolved using <code>tiesStrategy.</code>
- *
+ *
* @param data array to be ranked
* @return array of ranks
*/
public double[] rank(double[] data) {
-
+
// Array recording initial positions of data to be ranked
- IntDoublePair[] ranks = new IntDoublePair[data.length];
+ IntDoublePair[] ranks = new IntDoublePair[data.length];
for (int i = 0; i < data.length; i++) {
ranks[i] = new IntDoublePair(data[i], i);
}
-
+
// Recode, remove or record positions of NaNs
List<Integer> nanPositions = null;
switch (nanStrategy) {
@@ -212,14 +212,14 @@
default: // this should not happen unless NaNStrategy enum is changed
throw MathRuntimeException.createInternalError(null);
}
-
+
// Sort the IntDoublePairs
Arrays.sort(ranks);
-
+
// Walk the sorted array, filling output array using sorted positions,
// resolving ties as we go
double[] out = new double[ranks.length];
- int pos = 1; // position in sorted array
+ int pos = 1; // position in sorted array
out[ranks[0].getPosition()] = pos;
List<Integer> tiesTrace = new ArrayList<Integer>();
tiesTrace.add(ranks[0].getPosition());
@@ -246,11 +246,11 @@
}
return out;
}
-
+
/**
* Returns an array that is a copy of the input array with IntDoublePairs
* having NaN values removed.
- *
+ *
* @param ranks input array
* @return array with NaN-valued entries removed
*/
@@ -279,8 +279,8 @@
}
/**
- * Recodes NaN values to the given value.
- *
+ * Recodes NaN values to the given value.
+ *
* @param ranks array to recode
* @param value the value to replace NaNs with
*/
@@ -292,10 +292,10 @@
}
}
}
-
+
/**
* Checks for presence of NaNs in <code>ranks.</code>
- *
+ *
* @param ranks array to be searched for NaNs
* @return true iff ranks contains one or more NaNs
*/
@@ -307,7 +307,7 @@
}
return false;
}
-
+
/**
* Resolve a sequence of ties, using the configured {@link TiesStrategy}.
* The input <code>ranks</code> array is expected to take the same value
@@ -316,20 +316,20 @@
* tiesTrace = <2,4,7> and tiesStrategy is MINIMUM, ranks will be unchanged.
* The same array and trace with tiesStrategy AVERAGE will come out
* <5,8,3,6,3,7,1,3>.
- *
- * @param ranks array of ranks
+ *
+ * @param ranks array of ranks
* @param tiesTrace list of indices where <code>ranks</code> is constant
- * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j]
+ * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j]
* </code>
*/
private void resolveTie(double[] ranks, List<Integer> tiesTrace) {
-
+
// constant value of ranks over tiesTrace
final double c = ranks[tiesTrace.get(0)];
-
+
// length of sequence of tied ranks
final int length = tiesTrace.size();
-
+
switch (tiesStrategy) {
case AVERAGE: // Replace ranks with average
fill(ranks, tiesTrace, (2 * c + length - 1) / 2d);
@@ -344,7 +344,7 @@
Iterator<Integer> iterator = tiesTrace.iterator();
long f = Math.round(c);
while (iterator.hasNext()) {
- ranks[iterator.next()] =
+ ranks[iterator.next()] =
randomData.nextLong(f, f + length - 1);
}
break;
@@ -359,12 +359,12 @@
break;
default: // this should not happen unless TiesStrategy enum is changed
throw MathRuntimeException.createInternalError(null);
- }
+ }
}
-
+
/**
* Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code>
- *
+ *
* @param data array to modify
* @param tiesTrace list of index values to set
* @param value value to set
@@ -375,10 +375,10 @@
data[iterator.next()] = value;
}
}
-
+
/**
* Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code>
- *
+ *
* @param ranks array to modify
* @param nanPositions list of index values to set to <code>Double.NaN</code>
*/
@@ -388,14 +388,14 @@
}
Iterator<Integer> iterator = nanPositions.iterator();
while (iterator.hasNext()) {
- ranks[iterator.next().intValue()] = Double.NaN;
+ ranks[iterator.next().intValue()] = Double.NaN;
}
-
+
}
-
+
/**
* Returns a list of indexes where <code>ranks</code> is <code>NaN.</code>
- *
+ *
* @param ranks array to search for <code>NaNs</code>
* @return list of indexes i such that <code>ranks[i] = NaN</code>
*/
@@ -406,9 +406,9 @@
out.add(Integer.valueOf(i));
}
}
- return out;
+ return out;
}
-
+
/**
* Represents the position of a double value in an ordering.
* Comparable interface is implemented so Arrays.sort can be used
@@ -436,7 +436,7 @@
/**
* Compare this IntDoublePair to another pair.
* Only the <strong>values</strong> are compared.
- *
+ *
* @param other the other pair to compare this to
* @return result of <code>Double.compare(value, other.value)</code>
*/