You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ah...@apache.org on 2023/09/27 15:26:22 UTC
[commons-statistics] 02/03: Refactor statistics to final classes with a single implementation
This is an automated email from the ASF dual-hosted git repository.
aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-statistics.git
commit 5421ea5a868f3c6f1c2c31d759ec1c9d32bd1836
Author: Alex Herbert <ah...@apache.org>
AuthorDate: Wed Sep 27 16:04:24 2023 +0100
Refactor statistics to final classes with a single implementation
---
.../statistics/descriptive/FirstMoment.java | 1 +
.../apache/commons/statistics/descriptive/Max.java | 77 ++++-----
.../commons/statistics/descriptive/Mean.java | 115 ++++++-------
.../apache/commons/statistics/descriptive/Min.java | 75 ++++-----
.../apache/commons/statistics/descriptive/Sum.java | 81 ++++-----
.../descriptive/SumOfSquaredDeviations.java | 23 +--
.../commons/statistics/descriptive/Variance.java | 187 +++++++++------------
7 files changed, 234 insertions(+), 325 deletions(-)
diff --git a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/FirstMoment.java b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/FirstMoment.java
index 2c354c6..9f3d914 100644
--- a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/FirstMoment.java
+++ b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/FirstMoment.java
@@ -143,6 +143,7 @@ class FirstMoment implements DoubleConsumer {
public void accept(double value) {
// "Updating one-pass algorithm"
// See: Chan et al (1983) Equation 1.3a
+ // m_{i+1} = m_i + (x - m_i) / (i + 1)
// This is modified with scaling to avoid overflow for all finite input.
n++;
diff --git a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Max.java b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Max.java
index 83f77c6..2cf653b 100644
--- a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Max.java
+++ b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Max.java
@@ -17,11 +17,14 @@
package org.apache.commons.statistics.descriptive;
/**
- * Returns the maximum of the available values.
+ * Returns the maximum of the available values. Uses {@link Math#max Math.max} as an
+ * underlying function to compute the {@code maximum}.
*
- * <p>The result is {@code NaN} if any of the values is {@code NaN}.
- *
- * <p>The result is {@link Double#NEGATIVE_INFINITY negative infinity} if no values are added.
+ * <ul>
+ * <li>The result is {@link Double#NEGATIVE_INFINITY negative infinity} if no values are added.
+ * <li>The result is {@code NaN} if any of the values is {@code NaN}.
+ * <li>The value {@code -0.0} is considered strictly smaller than {@code 0.0}.
+ * </ul>
*
* <p>This class is designed to work with (though does not require)
* {@linkplain java.util.stream streams}.
@@ -40,28 +43,29 @@ package org.apache.commons.statistics.descriptive;
* safe and efficient parallel execution.
*
* @since 1.1
+ * @see Math#max(double, double)
*/
-public abstract class Max implements DoubleStatistic, DoubleStatisticAccumulator<Max> {
+public final class Max implements DoubleStatistic, DoubleStatisticAccumulator<Max> {
+
+ /** Current maximum. */
+ private double maximum = Double.NEGATIVE_INFINITY;
/**
- * Create a Max instance.
+ * Create an instance.
*/
- Max() {
- //No-op
+ private Max() {
+ // No-op
}
/**
- * Creates a {@code Max} implementation which does not store the input value(s) it consumes.
- *
- * <p>The result is {@code NaN} if any of the values is {@code NaN}.
+ * Creates a {@code Max} instance.
*
- * <p>The result is {@link Double#NEGATIVE_INFINITY negative infinity}
- * if no values have been added.
+ * <p>The initial result is {@link Double#NEGATIVE_INFINITY negative infinity}.
*
- * @return {@code Max} implementation.
+ * @return {@code Max} instance.
*/
public static Max create() {
- return new StorelessMax();
+ return new Max();
}
/**
@@ -76,15 +80,18 @@ public abstract class Max implements DoubleStatistic, DoubleStatisticAccumulator
* @return {@code Max} instance.
*/
public static Max of(double... values) {
- return Statistics.add(new StorelessMax(), values);
+ return Statistics.add(new Max(), values);
}
/**
* Updates the state of the statistic to reflect the addition of {@code value}.
+ *
* @param value Value.
*/
@Override
- public abstract void accept(double value);
+ public void accept(double value) {
+ maximum = Math.max(maximum, value);
+ }
/**
* Gets the maximum of all input values.
@@ -92,36 +99,16 @@ public abstract class Max implements DoubleStatistic, DoubleStatisticAccumulator
* <p>When no values have been added, the result is
* {@link Double#NEGATIVE_INFINITY negative infinity}.
*
- * @return {@code Maximum} of all values seen so far.
+ * @return maximum of all values.
*/
@Override
- public abstract double getAsDouble();
-
- /**
- * {@code Max} implementation that does not store the input value(s) processed so far.
- *
- * <p>Uses JDK's {@link Math#max Math.max} as an underlying function
- * to compute the {@code maximum}.
- */
- private static class StorelessMax extends Max {
-
- /** Current max. */
- private double max = Double.NEGATIVE_INFINITY;
-
- @Override
- public void accept(double value) {
- max = Math.max(max, value);
- }
-
- @Override
- public double getAsDouble() {
- return max;
- }
+ public double getAsDouble() {
+ return maximum;
+ }
- @Override
- public Max combine(Max other) {
- accept(other.getAsDouble());
- return this;
- }
+ @Override
+ public Max combine(Max other) {
+ accept(other.getAsDouble());
+ return this;
}
}
diff --git a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Mean.java b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Mean.java
index 0d0ba90..8f05d38 100644
--- a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Mean.java
+++ b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Mean.java
@@ -17,26 +17,31 @@
package org.apache.commons.statistics.descriptive;
/**
- * Computes the arithmetic mean of a set of values. Uses the following recursive
- * updating algorithm:
+ * Computes the arithmetic mean of the available values.
+ *
+ * <ul>
+ * <li>The result is {@code NaN} if no values are added.
+ * <li>The result is {@code NaN} if any of the values is {@code NaN}, or the values include
+ * infinite values of opposite sign.
+ * <li>The result is {@code +/-infinity} if values include infinite values of same sign.
+ * <li>The result is finite if all input values are finite.
+ * </ul>
+ *
+ * <p>Uses the following recursive updating algorithm:
* <ol>
- * <li>Initialize {@code m = } the first value</li>
+ * <li>Initialize \( m_1 \) using the first value</li>
* <li>For each additional value, update using <br>
- * {@code m = m + (new value - m) / (number of observations)}</li>
+ * \( m_{i+1} = m_i + (x - m_i) / (i + 1) \)</li>
* </ol>
*
* <p>If {@link #of(double...)} is used to compute the mean of a variable number
* of values, a two-pass, corrected algorithm is used, starting with
* the recursive updating algorithm mentioned above, which protects the mean from overflow,
* and then correcting this by adding the mean deviation of the data values from the
- * arithmetic mean. See, e.g. "Comparison of Several Algorithms for Computing
+ * one-pass mean. See, e.g. "Comparison of Several Algorithms for Computing
* Sample Means and Variances," Robert F. Ling, Journal of the American
* Statistical Association, Vol. 69, No. 348 (Dec., 1974), pp. 859-866.
*
- * <p>Returns {@code NaN} if the dataset is empty. Note that
- * {@code NaN} may also be returned if the input includes {@code NaN} and / or infinite
- * values of opposite sign.
- *
* <p>This class is designed to work with (though does not require)
* {@linkplain java.util.stream streams}.
*
@@ -55,32 +60,45 @@ package org.apache.commons.statistics.descriptive;
*
* @since 1.1
*/
-public abstract class Mean implements DoubleStatistic, DoubleStatisticAccumulator<Mean> {
+public final class Mean implements DoubleStatistic, DoubleStatisticAccumulator<Mean> {
+
+ /**
+ * First moment used to compute the mean.
+ */
+ private final FirstMoment firstMoment;
/**
- * Create a Mean instance.
+ * Create an instance.
*/
- Mean() {
- // No-op
+ private Mean() {
+ this(new FirstMoment());
}
/**
- * Creates a {@code Mean} implementation which does not store the input value(s) it consumes.
+ * Creates an instance with a moment.
*
- * <p>The result is {@code NaN} if any of the values is {@code NaN} or
- * if no values have been added.
+ * @param m1 First moment.
+ */
+ private Mean(FirstMoment m1) {
+ firstMoment = m1;
+ }
+
+ /**
+ * Creates a {@code Mean} instance.
+ *
+ * <p>The initial result is {@code NaN}.
*
- * @return {@code Mean} implementation.
+ * @return {@code Mean} instance.
*/
public static Mean create() {
- return new StorelessMean();
+ return new Mean();
}
/**
* Returns a {@code Mean} instance that has the arithmetic mean of all input values, or {@code NaN}
* if the input array is empty.
*
- * <p>Note: {@code Mean} computed using {@link Mean#accept Mean.accept()} may be different
+ * <p>Note: {@code Mean} computed using {@link #accept(double) accept} may be different
* from this mean.
*
* <p>See {@link Mean} for details on the computing algorithm.
@@ -89,67 +107,34 @@ public abstract class Mean implements DoubleStatistic, DoubleStatisticAccumulato
* @return {@code Mean} instance.
*/
public static Mean of(double... values) {
- return new StorelessMean(FirstMoment.of(values));
+ return new Mean(FirstMoment.of(values));
}
/**
* Updates the state of the statistic to reflect the addition of {@code value}.
+ *
* @param value Value.
*/
@Override
- public abstract void accept(double value);
+ public void accept(double value) {
+ firstMoment.accept(value);
+ }
/**
* Gets the mean of all input values.
*
* <p>When no values have been added, the result is {@code NaN}.
*
- * @return {@code Mean} of all values seen so far.
+ * @return mean of all values.
*/
@Override
- public abstract double getAsDouble();
-
- /**
- * {@code Mean} implementation that does not store the input value(s) processed so far.
- */
- private static class StorelessMean extends Mean {
-
- /**
- * External Moment used to compute the mean.
- */
- private final FirstMoment firstMoment;
-
- /**
- * Creates an instance with a moment.
- *
- * @param m1 First moment.
- */
- StorelessMean(FirstMoment m1) {
- firstMoment = m1;
- }
-
- /**
- * Create an instance.
- */
- StorelessMean() {
- this(new FirstMoment());
- }
-
- @Override
- public void accept(double value) {
- firstMoment.accept(value);
- }
-
- @Override
- public double getAsDouble() {
- return firstMoment.getFirstMoment();
- }
+ public double getAsDouble() {
+ return firstMoment.getFirstMoment();
+ }
- @Override
- public Mean combine(Mean other) {
- final StorelessMean that = (StorelessMean) other;
- firstMoment.combine(that.firstMoment);
- return this;
- }
+ @Override
+ public Mean combine(Mean other) {
+ firstMoment.combine(other.firstMoment);
+ return this;
}
}
diff --git a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Min.java b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Min.java
index a729d5a..352256b 100644
--- a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Min.java
+++ b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Min.java
@@ -17,11 +17,14 @@
package org.apache.commons.statistics.descriptive;
/**
- * Returns the minimum of the available values.
+ * Returns the minimum of the available values. Uses {@link Math#min Math.min} as an
+ * underlying function to compute the {@code minimum}.
*
- * <p>The result is {@code NaN} if any of the values is {@code NaN}.
- *
- * <p>The result is {@link Double#POSITIVE_INFINITY positive infinity} if no values are added.
+ * <ul>
+ * <li>The result is {@link Double#POSITIVE_INFINITY positive infinity} if no values are added.
+ * <li>The result is {@code NaN} if any of the values is {@code NaN}.
+ * <li>The value {@code -0.0} is considered strictly smaller than {@code 0.0}.
+ * </ul>
*
* <p>This class is designed to work with (though does not require)
* {@linkplain java.util.stream streams}.
@@ -40,28 +43,29 @@ package org.apache.commons.statistics.descriptive;
* safe and efficient parallel execution.
*
* @since 1.1
+ * @see Math#min(double, double)
*/
-public abstract class Min implements DoubleStatistic, DoubleStatisticAccumulator<Min> {
+public final class Min implements DoubleStatistic, DoubleStatisticAccumulator<Min> {
+
+ /** Current minimum. */
+ private double minimum = Double.POSITIVE_INFINITY;
/**
- * Create a Min instance.
+ * Create an instance.
*/
- Min() {
+ private Min() {
// No-op
}
/**
- * Creates a {@code Min} implementation which does not store the input value(s) it consumes.
- *
- * <p>The result is {@code NaN} if any of the values is {@code NaN}.
+ * Creates a {@code Min} instance.
*
- * <p>The result is {@link Double#POSITIVE_INFINITY positive infinity}
- * if no values have been added.
+ * <p>The initial result is {@link Double#POSITIVE_INFINITY positive infinity}.
*
- * @return {@code Min} implementation.
+ * @return {@code Min} instance.
*/
public static Min create() {
- return new StorelessMin();
+ return new Min();
}
/**
@@ -76,15 +80,18 @@ public abstract class Min implements DoubleStatistic, DoubleStatisticAccumulator
* @return {@code Min} instance.
*/
public static Min of(double... values) {
- return Statistics.add(new StorelessMin(), values);
+ return Statistics.add(new Min(), values);
}
/**
* Updates the state of the statistic to reflect the addition of {@code value}.
+ *
* @param value Value.
*/
@Override
- public abstract void accept(double value);
+ public void accept(double value) {
+ minimum = Math.min(minimum, value);
+ }
/**
* Gets the minimum of all input values.
@@ -92,36 +99,16 @@ public abstract class Min implements DoubleStatistic, DoubleStatisticAccumulator
* <p>When no values have been added, the result is
* {@link Double#POSITIVE_INFINITY positive infinity}.
*
- * @return {@code Minimum} of all values seen so far.
+ * @return minimum of all values.
*/
@Override
- public abstract double getAsDouble();
-
- /**
- * {@code Min} implementation that does not store the input value(s) processed so far.
- *
- * <p>Uses JDK's {@link Math#min Math.min} as an underlying function
- * to compute the {@code minimum}.
- */
- private static class StorelessMin extends Min {
-
- /** Current min. */
- private double min = Double.POSITIVE_INFINITY;
-
- @Override
- public void accept(double value) {
- min = Math.min(min, value);
- }
-
- @Override
- public double getAsDouble() {
- return min;
- }
+ public double getAsDouble() {
+ return minimum;
+ }
- @Override
- public Min combine(Min other) {
- accept(other.getAsDouble());
- return this;
- }
+ @Override
+ public Min combine(Min other) {
+ accept(other.getAsDouble());
+ return this;
}
}
diff --git a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Sum.java b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Sum.java
index a1f2ad1..8cf4659 100644
--- a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Sum.java
+++ b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Sum.java
@@ -19,14 +19,15 @@ package org.apache.commons.statistics.descriptive;
/**
* Returns the sum of the available values.
*
- * <p>The result is {@code NaN} if any of the values is {@code NaN}.
- *
- * <p>The result is zero if no values are added.
+ * <ul>
+ * <li>The result is zero if no values are added.
+ * <li>The result is {@code NaN} if any of the values is {@code NaN}.
+ * </ul>
*
* <p>This class is designed to work with (though does not require)
* {@linkplain java.util.stream streams}.
*
- * <p><strong>This implementation is not thread safe.</strong>
+ * <p><strong>This instance is not thread safe.</strong>
* If multiple threads access an instance of this class concurrently,
* and at least one of the threads invokes the {@link java.util.function.DoubleConsumer#accept(double) accept} or
* {@link DoubleStatisticAccumulator#combine(DoubleStatistic) combine} method, it must be synchronized externally.
@@ -35,35 +36,35 @@ package org.apache.commons.statistics.descriptive;
* and {@link DoubleStatisticAccumulator#combine(DoubleStatistic) combine}
* as {@code accumulator} and {@code combiner} functions of
* {@link java.util.stream.Collector Collector} on a parallel stream,
- * because the parallel implementation of {@link java.util.stream.Stream#collect Stream.collect()}
+ * because the parallel instance of {@link java.util.stream.Stream#collect Stream.collect()}
* provides the necessary partitioning, isolation, and merging of results for
* safe and efficient parallel execution.
*
* @since 1.1
+ * @see org.apache.commons.numbers.core.Sum
*/
-public abstract class Sum implements DoubleStatistic, DoubleStatisticAccumulator<Sum> {
+public final class Sum implements DoubleStatistic, DoubleStatisticAccumulator<Sum> {
+
+ /** {@link org.apache.commons.numbers.core.Sum Sum} used to compute the sum. */
+ private final org.apache.commons.numbers.core.Sum delegate =
+ org.apache.commons.numbers.core.Sum.create();
/**
- * Create a Sum instance.
+ * Create an instance.
*/
- Sum() {
- //No-op
+ private Sum() {
+ // No-op
}
/**
- * Creates a {@code Sum} implementation which does not store the input value(s) it consumes.
- *
- * <p>The result is {@code NaN} if any of the values is {@code NaN} or the sum
- * at any point is a {@code NaN}.
- *
- * <p>The result is zero if no values have been added.
+ * Creates a {@code Sum} instance.
*
- * <p>Uses the {@link org.apache.commons.numbers.core.Sum Commons Numbers Sum} implementation.
+ * <p>The initial result is zero.
*
- * @return {@code Sum} implementation.
+ * @return {@code Sum} instance.
*/
public static Sum create() {
- return new WrappedSum();
+ return new Sum();
}
/**
@@ -78,52 +79,34 @@ public abstract class Sum implements DoubleStatistic, DoubleStatisticAccumulator
* @return {@code Sum} instance.
*/
public static Sum of(double... values) {
- return Statistics.add(new WrappedSum(), values);
+ return Statistics.add(new Sum(), values);
}
/**
* Updates the state of the statistic to reflect the addition of {@code value}.
+ *
* @param value Value.
*/
@Override
- public abstract void accept(double value);
+ public void accept(double value) {
+ delegate.accept(value);
+ }
/**
* Gets the sum of all input values.
*
* <p>When no values have been added, the result is zero.
*
- * @return {@code Sum} of all values seen so far.
+ * @return sum of all values.
*/
@Override
- public abstract double getAsDouble();
-
- /**
- * {@code Sum} implementation that does not store the input value(s) processed so far.
- *
- * <p>Delegates to the {@link org.apache.commons.numbers.core.Sum} implementation.
- */
- private static class WrappedSum extends Sum {
-
- /** Create an instance of {@link org.apache.commons.numbers.core.Sum Sum}. */
- private final org.apache.commons.numbers.core.Sum delegate =
- org.apache.commons.numbers.core.Sum.create();
-
- @Override
- public void accept(double value) {
- delegate.add(value);
- }
-
- @Override
- public double getAsDouble() {
- return delegate.getAsDouble();
- }
+ public double getAsDouble() {
+ return delegate.getAsDouble();
+ }
- @Override
- public Sum combine(Sum other) {
- final WrappedSum that = (WrappedSum) other;
- delegate.add(that.delegate);
- return this;
- }
+ @Override
+ public Sum combine(Sum other) {
+ delegate.add(other.delegate);
+ return this;
}
}
diff --git a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/SumOfSquaredDeviations.java b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/SumOfSquaredDeviations.java
index a06f645..0d41db4 100644
--- a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/SumOfSquaredDeviations.java
+++ b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/SumOfSquaredDeviations.java
@@ -20,19 +20,15 @@ package org.apache.commons.statistics.descriptive;
* Computes the sum of squared deviations from the sample mean. This
* statistic is related to the second moment.
*
- * <p>
- * The following recursive updating formula is used:
- * <p>
- * Let <ul>
- * <li> dev = (current obs - previous mean) </li>
- * <li> n = number of observations (including current obs) </li>
+ * <p>The following recursive updating formula is used:
+ * <p>Let
+ * <ul>
+ * <li> dev = (current obs - previous mean) </li>
+ * <li> n = number of observations (including current obs) </li>
* </ul>
- * Then
- * <p>
- * new value = old value + dev^2 * (n - 1) / n.
- * <p>
- *
- * Returns the sum of squared deviations of all values seen so far.
+ * <p>Then
+ * <p>new value = old value + dev^2 * (n - 1) / n
+ * <p>returns the sum of squared deviations of all values seen so far.
*
* <p><strong>Note that this implementation is not synchronized.</strong> If
* multiple threads access an instance of this class concurrently, and at least
@@ -90,8 +86,7 @@ class SumOfSquaredDeviations extends FirstMoment {
* </ul>
*
* <p>Note: {@code SumOfSquaredDeviations} computed using
- * {@link SumOfSquaredDeviations#accept SumOfSquaredDeviations.accept()} may be different
- * from this instance.
+ * {@link #accept accept} may be different from this instance.
*
* <p>See {@link SumOfSquaredDeviations} for details on the computing algorithm.
*
diff --git a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Variance.java b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Variance.java
index 21b10fe..6828ac0 100644
--- a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Variance.java
+++ b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Variance.java
@@ -17,41 +17,49 @@
package org.apache.commons.statistics.descriptive;
/**
- * Computes the variance of a set of values. By default, the
- * "sample variance" is computed. The definitional formula for sample
- * variance is:
- * <p>
- * sum((x_i - mean)^2) / (n - 1)
+ * Computes the variance of the available values. By default, the
+ * "sample variance" is computed.
+ *
+ * <ul>
+ * <li>The result is {@code NaN} if no values are added.
+ * <li>The result is {@code NaN} if any of the values is {@code NaN} or infinite.
+ * <li>The result is {@code NaN} if the sum of the squared deviations from the mean is infinite.
+ * <li>The result is zero if there is one finite value in the data set.
+ * </ul>
+ *
+ * <p>The definitional formula for sample variance is:
+ *
+ * <p>\[ \frac{1}{n - 1} \sum_i^n{(x_i - \mu)^2} \]
+ *
+ * <p>where \( \mu \) is the sample mean.
+ *
* <p>This formula does not have good numerical properties, so this
- * implementation does not use it to compute the statistic.
+ * instance does not use it to compute the statistic.
+
* <ul>
- * <li> The {@link #accept(double)} method computes the variance using
- * updating formulae based on West's algorithm, as described in
- * <a href="http://doi.acm.org/10.1145/359146.359152"> Chan, T. F. and
- * J. G. Lewis 1979, <i>Communications of the ACM</i>,
- * vol. 22 no. 9, pp. 526-531.</a></li>
+ * <li>The {@link #accept(double)} method computes the variance using
+ * updating formulae based on West's algorithm, as described in
+ * <a href="http://doi.acm.org/10.1145/359146.359152"> Chan, T. F. and
+ * J. G. Lewis 1979, <i>Communications of the ACM</i>,
+ * vol. 22 no. 9, pp. 526-531.</a>
*
- * <li> The {@link #of(double...)} method leverages the fact that it has the
- * full array of values in memory to execute a two-pass algorithm.
- * Specifically, this method uses the "corrected two-pass algorithm" from
- * Chan, Golub, Levesque, <i>Algorithms for Computing the Sample Variance</i>,
- * American Statistician, vol. 37, no. 3 (1983) pp. 242-247.</li></ul>
+ * <li>The {@link #of(double...)} method leverages the fact that it has the
+ * full array of values in memory to execute a two-pass algorithm.
+ * Specifically, this method uses the "corrected two-pass algorithm" from
+ * Chan, Golub, Levesque, <i>Algorithms for Computing the Sample Variance</i>,
+ * American Statistician, vol. 37, no. 3 (1983) pp. 242-247.
+ * </ul>
*
- * Note that adding values using {@code accept} and then executing {@code getAsDouble} will
+ * <p>Note that adding values using {@link #accept(double) accept} and then executing
+ * {@link #getAsDouble() getAsDouble} will
* sometimes give a different, less accurate, result than executing
- * {@code of} with the full array of values. The former approach
+ * {@link #of(double...) of} with the full array of values. The former approach
* should only be used when the full array of values is not available.
*
- * <p>
- * Returns {@code NaN} if no data values have been added and
- * returns {@code 0} if there is just one finite value in the data set.
- * Note that {@code NaN} may also be returned if the input includes
- * {@code NaN} and / or infinite values.
- *
* <p>This class is designed to work with (though does not require)
* {@linkplain java.util.stream streams}.
*
- * <p><strong>Note that this implementation is not synchronized.</strong> If
+ * <p><strong>Note that this instance is not synchronized.</strong> If
* multiple threads access an instance of this class concurrently, and at least
* one of the threads invokes the {@link java.util.function.DoubleConsumer#accept(double) accept} or
* {@link DoubleStatisticAccumulator#combine(DoubleStatistic) combine} method, it must be synchronized externally.
@@ -60,48 +68,52 @@ package org.apache.commons.statistics.descriptive;
* and {@link DoubleStatisticAccumulator#combine(DoubleStatistic) combine}
* as {@code accumulator} and {@code combiner} functions of
* {@link java.util.stream.Collector Collector} on a parallel stream,
- * because the parallel implementation of {@link java.util.stream.Stream#collect Stream.collect()}
+ * because the parallel instance of {@link java.util.stream.Stream#collect Stream.collect()}
* provides the necessary partitioning, isolation, and merging of results for
* safe and efficient parallel execution.
*
* @since 1.1
*/
-public abstract class Variance implements DoubleStatistic, DoubleStatisticAccumulator<Variance> {
+public final class Variance implements DoubleStatistic, DoubleStatisticAccumulator<Variance> {
/**
- * Create a Variance instance.
+ * An instance of {@link SumOfSquaredDeviations}, which is used to
+ * compute the variance.
*/
- Variance() {
- // No-op
+ private final SumOfSquaredDeviations ss;
+
+ /**
+ * Create an instance.
+ */
+ private Variance() {
+ this(new SumOfSquaredDeviations());
+ }
+
+ /**
+ * Creates an instance with the sum of squared deviations from the mean.
+ *
+ * @param ss Sum of squared deviations.
+ */
+ private Variance(SumOfSquaredDeviations ss) {
+ this.ss = ss;
}
/**
- * Creates a {@code Variance} implementation which does not store the input value(s) it consumes.
+ * Creates a {@code Variance} instance.
*
- * <p>The result is {@code NaN} if:
- * <ul>
- * <li>no values have been added,</li>
- * <li>any of the values is {@code NaN}, or</li>
- * <li>an infinite value of either sign is encountered</li>
- * </ul>
+ * <p>The initial result is {@code NaN}.
*
- * @return {@code Variance} implementation.
+ * @return {@code Variance} instance.
*/
public static Variance create() {
- return new StorelessSampleVariance();
+ return new Variance();
}
/**
* Returns a {@code Variance} instance that has the variance of all input values, or {@code NaN}
- * if:
- * <ul>
- * <li>the input array is empty,</li>
- * <li>any of the values is {@code NaN},</li>
- * <li>an infinite value of either sign is encountered, or</li>
- * <li>the sum of the squared deviations from the mean is infinite</li>
- * </ul>
+ * if the input array is empty.
*
- * <p>Note: {@code Variance} computed using {@link Variance#accept Variance.accept()} may be different
+ * <p>Note: {@code Variance} computed using {@link #accept(double) accept} may be different
* from this variance.
*
* <p>See {@link Variance} for details on the computing algorithm.
@@ -110,82 +122,41 @@ public abstract class Variance implements DoubleStatistic, DoubleStatisticAccumu
* @return {@code Variance} instance.
*/
public static Variance of(double... values) {
- return new StorelessSampleVariance(SumOfSquaredDeviations.of(values));
+ return new Variance(SumOfSquaredDeviations.of(values));
}
/**
* Updates the state of the statistic to reflect the addition of {@code value}.
+ *
* @param value Value.
*/
@Override
- public abstract void accept(double value);
+ public void accept(double value) {
+ ss.accept(value);
+ }
/**
* Gets the variance of all input values.
*
- * <p>The result is {@code NaN} if :
- * <ul>
- * <li>the input array is empty,</li>
- * <li>any of the values is {@code NaN}, or</li>
- * <li>an infinite value of either sign is encountered</li>
- * </ul>
- *
- * <p>The result is {@code 0} if there is just one finite value in the data set.
+ * <p>When no values have been added, the result is {@code NaN}.
*
- * @return {@code Variance} of all values seen so far.
+ * @return variance of all values.
*/
@Override
- public abstract double getAsDouble();
-
- /**
- * {@code Variance} implementation that does not store the input value(s) processed so far.
- */
- private static class StorelessSampleVariance extends Variance {
-
- /**
- * An instance of {@link SumOfSquaredDeviations}, which is used to
- * compute the variance.
- */
- private final SumOfSquaredDeviations ss;
-
- /**
- * Creates an instance with the sum of squared deviations from the mean.
- *
- * @param ss Sum of squared deviations.
- */
- StorelessSampleVariance(SumOfSquaredDeviations ss) {
- this.ss = ss;
- }
-
- /**
- * Create an instance.
- */
- StorelessSampleVariance() {
- this(new SumOfSquaredDeviations());
- }
-
- @Override
- public void accept(double value) {
- ss.accept(value);
- }
-
- @Override
- public double getAsDouble() {
- final double sumOfSquaredDev = ss.getSumOfSquaredDeviations();
- final long n = ss.n;
- if (n == 0) {
- return Double.NaN;
- } else if (n == 1 && Double.isFinite(sumOfSquaredDev)) {
- return 0;
- }
- return sumOfSquaredDev / (n - 1.0);
+ public double getAsDouble() {
+ final double sumOfSquaredDev = ss.getSumOfSquaredDeviations();
+ final long n = ss.n;
+ if (n == 0) {
+ return Double.NaN;
+ } else if (n == 1 && Double.isFinite(sumOfSquaredDev)) {
+ return 0;
}
+ return sumOfSquaredDev / (n - 1.0);
+ }
- @Override
- public Variance combine(Variance other) {
- final StorelessSampleVariance that = (StorelessSampleVariance) other;
- ss.combine(that.ss);
- return this;
- }
+ @Override
+ public Variance combine(Variance other) {
+ ss.combine(other.ss);
+ return this;
}
}