You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ps...@apache.org on 2009/07/09 02:53:32 UTC

svn commit: r792365 - in /commons/proper/math/trunk/src: java/org/apache/commons/math/stat/descriptive/AggregateSummaryStatistics.java test/org/apache/commons/math/stat/descriptive/AggregateSummaryStatisticsTest.java

Author: psteitz
Date: Thu Jul  9 00:53:32 2009
New Revision: 792365

URL: http://svn.apache.org/viewvc?rev=792365&view=rev
Log:
Cleanup / complete AggregateSummaryStatistics implementation
 * Fully synchronized access to aggregating SummaryStatistics instance
 * Exposed all SummaryStatistics stats
 * Improved test coverage
 * Improved javadoc
JIRA: MATH-224

Modified:
    commons/proper/math/trunk/src/java/org/apache/commons/math/stat/descriptive/AggregateSummaryStatistics.java
    commons/proper/math/trunk/src/test/org/apache/commons/math/stat/descriptive/AggregateSummaryStatisticsTest.java

Modified: commons/proper/math/trunk/src/java/org/apache/commons/math/stat/descriptive/AggregateSummaryStatistics.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/java/org/apache/commons/math/stat/descriptive/AggregateSummaryStatistics.java?rev=792365&r1=792364&r2=792365&view=diff
==============================================================================
--- commons/proper/math/trunk/src/java/org/apache/commons/math/stat/descriptive/AggregateSummaryStatistics.java (original)
+++ commons/proper/math/trunk/src/java/org/apache/commons/math/stat/descriptive/AggregateSummaryStatistics.java Thu Jul  9 00:53:32 2009
@@ -23,7 +23,7 @@
 
 /**
  * <p>
- * A StatisticalSummary that aggregates statistics from several data sets or
+ * An aggregator for {@code SummaryStatistics} from several data sets or
  * data set partitions.  In its simplest usage mode, the client creates an
  * instance via the zero-argument constructor, then uses
  * {@link #createContributingStatistics()} to obtain a {@code SummaryStatistics}
@@ -34,7 +34,16 @@
  * Clients with specialized requirements can use alternative constructors to
  * control the statistics implementations and initial values used by the
  * contributing and the internal aggregate {@code SummaryStatistics} objects.
- * </p>
+ * </p><p>
+ * A static {@link #aggregate(Collection)} method is also included that computes
+ * aggregate statistics directly from a Collection of SummaryStatistics instances.
+ * </p><p>
+ * When {@link #createContributingStatistics()} is used to create SummaryStatistics
+ * instances to be aggregated concurrently, the created instances' 
+ * {@link SummaryStatistics#addValue(double)} methods must synchronize on the aggregating
+ * instance maintained by this class.  In multithreaded environments, if the functionality
+ * provided by {@link #aggregate(Collection)} is adequate, that method should be used
+ * to avoid unecessary computation and synchronization delays.</p>
  *
  * @since 2.0
  * @version $Revision:$ $Date:$
@@ -45,7 +54,7 @@
 
  
     /** Serializable version identifier */
-   private static final long serialVersionUID = -8207112444016386906L;
+    private static final long serialVersionUID = -8207112444016386906L;
 
     /**
      * A SummaryStatistics serving as a prototype for creating SummaryStatistics
@@ -54,7 +63,7 @@
     private final SummaryStatistics statisticsPrototype;
     
     /**
-     * The SummaryStatistics in which aggregate statistics are accumulated 
+     * The SummaryStatistics in which aggregate statistics are accumulated.
      */
     private final SummaryStatistics statistics;
     
@@ -124,7 +133,9 @@
      * @see StatisticalSummary#getMax()
      */
     public double getMax() {
-        return statistics.getMax();
+        synchronized (statistics) {
+            return statistics.getMax();
+        }
     }
 
     /**
@@ -133,7 +144,9 @@
      * @see StatisticalSummary#getMean()
      */
     public double getMean() {
-        return statistics.getMean();
+        synchronized (statistics) {
+            return statistics.getMean();
+        }
     }
 
     /**
@@ -143,7 +156,9 @@
      * @see StatisticalSummary#getMin()
      */
     public double getMin() {
-        return statistics.getMin();
+        synchronized (statistics) {
+            return statistics.getMin();
+        }
     }
 
     /**
@@ -152,7 +167,9 @@
      * @see StatisticalSummary#getN()
      */
     public long getN() {
-        return statistics.getN();
+        synchronized (statistics) {
+            return statistics.getN();
+        }
     }
 
     /**
@@ -162,7 +179,9 @@
      * @see StatisticalSummary#getStandardDeviation()
      */
     public double getStandardDeviation() {
-        return statistics.getStandardDeviation();
+        synchronized (statistics) {
+            return statistics.getStandardDeviation();
+        }
     }
 
     /**
@@ -171,7 +190,9 @@
      * @see StatisticalSummary#getSum()
      */
     public double getSum() {
-        return statistics.getSum();
+        synchronized (statistics) {
+            return statistics.getSum();
+        }
     }
 
     /**
@@ -181,7 +202,72 @@
      * @see StatisticalSummary#getVariance()
      */
     public double getVariance() {
-        return statistics.getVariance();
+        synchronized (statistics) {
+            return statistics.getVariance();
+        }
+    }
+    
+    /**
+     * Returns the sum of the logs of all the aggregated data.
+     * 
+     * @return the sum of logs
+     * @see SummaryStatistics#getSumOfLogs()
+     */
+    public double getSumOfLogs() {
+        synchronized (statistics) {
+            return statistics.getSumOfLogs();
+        }
+    }
+    
+    /**
+     * Returns the geometric mean of all the aggregated data.
+     * 
+     * @return the geometric mean
+     * @see SummaryStatistics#getGeometricMean()
+     */
+    public double getGeometricMean() {
+        synchronized (statistics) {
+            return statistics.getGeometricMean();
+        }
+    }
+    
+    /**
+     * Returns the sum of the squares of all the aggregated data.
+     * 
+     * @return The sum of squares
+     * @see SummaryStatistics#getSumsq()
+     */
+    public double getSumsq() {
+        synchronized (statistics) {
+            return statistics.getSumsq();
+        }
+    }
+    
+    /**
+     * Returns a statistic related to the Second Central Moment.  Specifically,
+     * what is returned is the sum of squared deviations from the sample mean
+     * among the all of the aggregated data.
+     * 
+     * @return second central moment statistic
+     * @see SummaryStatistics#getSecondMoment()
+     */
+    public double getSecondMoment() {
+        synchronized (statistics) {
+            return statistics.getSecondMoment();
+        }
+    }
+    
+    /**
+     * Return a {@link StatisticalSummaryValues} instance reporting current
+     * aggregate statistics.
+     * 
+     * @return Current values of aggregate statistics
+     */
+    public StatisticalSummary getSummary() {
+        synchronized (statistics) {
+            return new StatisticalSummaryValues(getMean(), getVariance(), getN(), 
+                    getMax(), getMin(), getSum());
+        }
     }
 
     /**

Modified: commons/proper/math/trunk/src/test/org/apache/commons/math/stat/descriptive/AggregateSummaryStatisticsTest.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/org/apache/commons/math/stat/descriptive/AggregateSummaryStatisticsTest.java?rev=792365&r1=792364&r2=792365&view=diff
==============================================================================
--- commons/proper/math/trunk/src/test/org/apache/commons/math/stat/descriptive/AggregateSummaryStatisticsTest.java (original)
+++ commons/proper/math/trunk/src/test/org/apache/commons/math/stat/descriptive/AggregateSummaryStatisticsTest.java Thu Jul  9 00:53:32 2009
@@ -78,6 +78,60 @@
     }
     
     /**
+     * Verify that aggregating over a partition gives the same results
+     * as direct computation.
+     * 
+     *  1) Randomly generate a dataset of 10-100 values
+     *     from [-100, 100]
+     *  2) Divide the dataset it into 2-5 partitions
+     *  3) Create an AggregateSummaryStatistic and ContributingStatistics
+     *     for each partition 
+     *  4) Compare results from the AggregateSummaryStatistic with values
+     *     returned by a single SummaryStatistics instance that is provided 
+     *     the full dataset
+     */
+    public void testAggregationConsistency() throws Exception {
+        
+        // Generate a random sample and random partition
+        double[] totalSample = generateSample();
+        double[][] subSamples = generatePartition(totalSample);
+        int nSamples = subSamples.length;
+        
+        // Create aggregator and total stats for comparison
+        AggregateSummaryStatistics aggregate = new AggregateSummaryStatistics();
+        SummaryStatistics totalStats = new SummaryStatistics();
+        
+        // Create array of component stats
+        SummaryStatistics componentStats[] = new SummaryStatistics[nSamples];
+        
+        for (int i = 0; i < nSamples; i++) {
+            
+            // Make componentStats[i] a contributing statistic to aggregate
+            componentStats[i] = aggregate.createContributingStatistics();
+            
+            // Add values from subsample
+            for (int j = 0; j < subSamples[i].length; j++) {
+                componentStats[i].addValue(subSamples[i][j]);
+            }
+        }
+        
+        // Compute totalStats directly
+        for (int i = 0; i < totalSample.length; i++) {
+            totalStats.addValue(totalSample[i]);
+        }
+        
+        /*
+         * Compare statistics in totalStats with aggregate.
+         * Note that guaranteed success of this comparison depends on the
+         * fact that <aggregate> gets values in exactly the same order
+         * as <totalStats>. 
+         *  
+         */
+        assertEquals(totalStats.getSummary(), aggregate.getSummary());  
+        
+    }
+    
+    /**
      * Test aggregate function by randomly generating a dataset of 10-100 values
      * from [-100, 100], dividing it into 2-5 partitions, computing stats for each
      * partition and comparing the result of aggregate(...) applied to the collection
@@ -176,7 +230,7 @@
     }
     
     /**
-     * Verifies that two StatisticalSummaryValues report the same values up
+     * Verifies that a StatisticalSummary and a StatisticalSummaryValues are equal up
      * to delta, with NaNs, infinities returned in the same spots. For max, min, n, values
      * have to agree exactly, delta is used only for sum, mean, variance, std dev.
      */