You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ps...@apache.org on 2009/07/09 02:53:32 UTC
svn commit: r792365 - in /commons/proper/math/trunk/src:
java/org/apache/commons/math/stat/descriptive/AggregateSummaryStatistics.java
test/org/apache/commons/math/stat/descriptive/AggregateSummaryStatisticsTest.java
Author: psteitz
Date: Thu Jul 9 00:53:32 2009
New Revision: 792365
URL: http://svn.apache.org/viewvc?rev=792365&view=rev
Log:
Cleanup / complete AggregateSummaryStatistics implementation
* Fully synchronized access to aggregating SummaryStatistics instance
* Exposed all SummaryStatistics stats
* Improved test coverage
* Improved javadoc
JIRA: MATH-224
Modified:
commons/proper/math/trunk/src/java/org/apache/commons/math/stat/descriptive/AggregateSummaryStatistics.java
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/descriptive/AggregateSummaryStatisticsTest.java
Modified: commons/proper/math/trunk/src/java/org/apache/commons/math/stat/descriptive/AggregateSummaryStatistics.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/java/org/apache/commons/math/stat/descriptive/AggregateSummaryStatistics.java?rev=792365&r1=792364&r2=792365&view=diff
==============================================================================
--- commons/proper/math/trunk/src/java/org/apache/commons/math/stat/descriptive/AggregateSummaryStatistics.java (original)
+++ commons/proper/math/trunk/src/java/org/apache/commons/math/stat/descriptive/AggregateSummaryStatistics.java Thu Jul 9 00:53:32 2009
@@ -23,7 +23,7 @@
/**
* <p>
- * A StatisticalSummary that aggregates statistics from several data sets or
+ * An aggregator for {@code SummaryStatistics} from several data sets or
* data set partitions. In its simplest usage mode, the client creates an
* instance via the zero-argument constructor, then uses
* {@link #createContributingStatistics()} to obtain a {@code SummaryStatistics}
@@ -34,7 +34,16 @@
* Clients with specialized requirements can use alternative constructors to
* control the statistics implementations and initial values used by the
* contributing and the internal aggregate {@code SummaryStatistics} objects.
- * </p>
+ * </p><p>
+ * A static {@link #aggregate(Collection)} method is also included that computes
+ * aggregate statistics directly from a Collection of SummaryStatistics instances.
+ * </p><p>
+ * When {@link #createContributingStatistics()} is used to create SummaryStatistics
+ * instances to be aggregated concurrently, the created instances'
+ * {@link SummaryStatistics#addValue(double)} methods must synchronize on the aggregating
+ * instance maintained by this class. In multithreaded environments, if the functionality
+ * provided by {@link #aggregate(Collection)} is adequate, that method should be used
+ * to avoid unecessary computation and synchronization delays.</p>
*
* @since 2.0
* @version $Revision:$ $Date:$
@@ -45,7 +54,7 @@
/** Serializable version identifier */
- private static final long serialVersionUID = -8207112444016386906L;
+ private static final long serialVersionUID = -8207112444016386906L;
/**
* A SummaryStatistics serving as a prototype for creating SummaryStatistics
@@ -54,7 +63,7 @@
private final SummaryStatistics statisticsPrototype;
/**
- * The SummaryStatistics in which aggregate statistics are accumulated
+ * The SummaryStatistics in which aggregate statistics are accumulated.
*/
private final SummaryStatistics statistics;
@@ -124,7 +133,9 @@
* @see StatisticalSummary#getMax()
*/
public double getMax() {
- return statistics.getMax();
+ synchronized (statistics) {
+ return statistics.getMax();
+ }
}
/**
@@ -133,7 +144,9 @@
* @see StatisticalSummary#getMean()
*/
public double getMean() {
- return statistics.getMean();
+ synchronized (statistics) {
+ return statistics.getMean();
+ }
}
/**
@@ -143,7 +156,9 @@
* @see StatisticalSummary#getMin()
*/
public double getMin() {
- return statistics.getMin();
+ synchronized (statistics) {
+ return statistics.getMin();
+ }
}
/**
@@ -152,7 +167,9 @@
* @see StatisticalSummary#getN()
*/
public long getN() {
- return statistics.getN();
+ synchronized (statistics) {
+ return statistics.getN();
+ }
}
/**
@@ -162,7 +179,9 @@
* @see StatisticalSummary#getStandardDeviation()
*/
public double getStandardDeviation() {
- return statistics.getStandardDeviation();
+ synchronized (statistics) {
+ return statistics.getStandardDeviation();
+ }
}
/**
@@ -171,7 +190,9 @@
* @see StatisticalSummary#getSum()
*/
public double getSum() {
- return statistics.getSum();
+ synchronized (statistics) {
+ return statistics.getSum();
+ }
}
/**
@@ -181,7 +202,72 @@
* @see StatisticalSummary#getVariance()
*/
public double getVariance() {
- return statistics.getVariance();
+ synchronized (statistics) {
+ return statistics.getVariance();
+ }
+ }
+
+ /**
+ * Returns the sum of the logs of all the aggregated data.
+ *
+ * @return the sum of logs
+ * @see SummaryStatistics#getSumOfLogs()
+ */
+ public double getSumOfLogs() {
+ synchronized (statistics) {
+ return statistics.getSumOfLogs();
+ }
+ }
+
+ /**
+ * Returns the geometric mean of all the aggregated data.
+ *
+ * @return the geometric mean
+ * @see SummaryStatistics#getGeometricMean()
+ */
+ public double getGeometricMean() {
+ synchronized (statistics) {
+ return statistics.getGeometricMean();
+ }
+ }
+
+ /**
+ * Returns the sum of the squares of all the aggregated data.
+ *
+ * @return The sum of squares
+ * @see SummaryStatistics#getSumsq()
+ */
+ public double getSumsq() {
+ synchronized (statistics) {
+ return statistics.getSumsq();
+ }
+ }
+
+ /**
+ * Returns a statistic related to the Second Central Moment. Specifically,
+ * what is returned is the sum of squared deviations from the sample mean
+ * among the all of the aggregated data.
+ *
+ * @return second central moment statistic
+ * @see SummaryStatistics#getSecondMoment()
+ */
+ public double getSecondMoment() {
+ synchronized (statistics) {
+ return statistics.getSecondMoment();
+ }
+ }
+
+ /**
+ * Return a {@link StatisticalSummaryValues} instance reporting current
+ * aggregate statistics.
+ *
+ * @return Current values of aggregate statistics
+ */
+ public StatisticalSummary getSummary() {
+ synchronized (statistics) {
+ return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
+ getMax(), getMin(), getSum());
+ }
}
/**
Modified: commons/proper/math/trunk/src/test/org/apache/commons/math/stat/descriptive/AggregateSummaryStatisticsTest.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/org/apache/commons/math/stat/descriptive/AggregateSummaryStatisticsTest.java?rev=792365&r1=792364&r2=792365&view=diff
==============================================================================
--- commons/proper/math/trunk/src/test/org/apache/commons/math/stat/descriptive/AggregateSummaryStatisticsTest.java (original)
+++ commons/proper/math/trunk/src/test/org/apache/commons/math/stat/descriptive/AggregateSummaryStatisticsTest.java Thu Jul 9 00:53:32 2009
@@ -78,6 +78,60 @@
}
/**
+ * Verify that aggregating over a partition gives the same results
+ * as direct computation.
+ *
+ * 1) Randomly generate a dataset of 10-100 values
+ * from [-100, 100]
+ * 2) Divide the dataset it into 2-5 partitions
+ * 3) Create an AggregateSummaryStatistic and ContributingStatistics
+ * for each partition
+ * 4) Compare results from the AggregateSummaryStatistic with values
+ * returned by a single SummaryStatistics instance that is provided
+ * the full dataset
+ */
+ public void testAggregationConsistency() throws Exception {
+
+ // Generate a random sample and random partition
+ double[] totalSample = generateSample();
+ double[][] subSamples = generatePartition(totalSample);
+ int nSamples = subSamples.length;
+
+ // Create aggregator and total stats for comparison
+ AggregateSummaryStatistics aggregate = new AggregateSummaryStatistics();
+ SummaryStatistics totalStats = new SummaryStatistics();
+
+ // Create array of component stats
+ SummaryStatistics componentStats[] = new SummaryStatistics[nSamples];
+
+ for (int i = 0; i < nSamples; i++) {
+
+ // Make componentStats[i] a contributing statistic to aggregate
+ componentStats[i] = aggregate.createContributingStatistics();
+
+ // Add values from subsample
+ for (int j = 0; j < subSamples[i].length; j++) {
+ componentStats[i].addValue(subSamples[i][j]);
+ }
+ }
+
+ // Compute totalStats directly
+ for (int i = 0; i < totalSample.length; i++) {
+ totalStats.addValue(totalSample[i]);
+ }
+
+ /*
+ * Compare statistics in totalStats with aggregate.
+ * Note that guaranteed success of this comparison depends on the
+ * fact that <aggregate> gets values in exactly the same order
+ * as <totalStats>.
+ *
+ */
+ assertEquals(totalStats.getSummary(), aggregate.getSummary());
+
+ }
+
+ /**
* Test aggregate function by randomly generating a dataset of 10-100 values
* from [-100, 100], dividing it into 2-5 partitions, computing stats for each
* partition and comparing the result of aggregate(...) applied to the collection
@@ -176,7 +230,7 @@
}
/**
- * Verifies that two StatisticalSummaryValues report the same values up
+ * Verifies that a StatisticalSummary and a StatisticalSummaryValues are equal up
* to delta, with NaNs, infinities returned in the same spots. For max, min, n, values
* have to agree exactly, delta is used only for sum, mean, variance, std dev.
*/