You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by tn...@apache.org on 2015/10/19 21:36:36 UTC

[math] [MATH-837] Support aggregation of any kind of StatisticalSummary in AggregateSummaryStatistics.

Repository: commons-math
Updated Branches:
  refs/heads/MATH_3_X 9f1368715 -> 5511eec3b


[MATH-837] Support aggregation of any kind of StatisticalSummary in AggregateSummaryStatistics.


Project: http://git-wip-us.apache.org/repos/asf/commons-math/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-math/commit/5511eec3
Tree: http://git-wip-us.apache.org/repos/asf/commons-math/tree/5511eec3
Diff: http://git-wip-us.apache.org/repos/asf/commons-math/diff/5511eec3

Branch: refs/heads/MATH_3_X
Commit: 5511eec3b4dff7e50c6d662b32749b25d18c5b91
Parents: 9f13687
Author: Thomas Neidhart <th...@gmail.com>
Authored: Mon Oct 19 21:36:15 2015 +0200
Committer: Thomas Neidhart <th...@gmail.com>
Committed: Mon Oct 19 21:36:15 2015 +0200

----------------------------------------------------------------------
 src/changes/changes.xml                         |  4 ++
 .../descriptive/AggregateSummaryStatistics.java | 12 +++---
 .../AggregateSummaryStatisticsTest.java         | 40 ++++++++++++++++++--
 3 files changed, 48 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-math/blob/5511eec3/src/changes/changes.xml
----------------------------------------------------------------------
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 1d0de9a..8d466fd 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -51,6 +51,10 @@ If the output is not quite correct, check for invisible trailing spaces!
   </properties>
   <body>
     <release version="3.6" date="XXXX-XX-XX" description="">
+      <action dev="tn" type="add" issue="MATH-837">
+        "AggregateSummaryStatistics" can now aggregate any kind of
+        "StatisticalSummary".
+      </action>
       <action dev="erans" type="fix" issue="MATH-1279">
         Check precondition (class "o.a.c.m.random.EmpiricalDistribution").
       </action>

http://git-wip-us.apache.org/repos/asf/commons-math/blob/5511eec3/src/main/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatistics.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatistics.java b/src/main/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatistics.java
index ebc3e33..b1ccce5 100644
--- a/src/main/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatistics.java
+++ b/src/main/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatistics.java
@@ -302,20 +302,21 @@ public class AggregateSummaryStatistics implements StatisticalSummary,
      * @param statistics collection of SummaryStatistics to aggregate
      * @return summary statistics for the combined dataset
      */
-    public static StatisticalSummaryValues aggregate(Collection<SummaryStatistics> statistics) {
+    public static StatisticalSummaryValues aggregate(Collection<? extends StatisticalSummary> statistics) {
         if (statistics == null) {
             return null;
         }
-        Iterator<SummaryStatistics> iterator = statistics.iterator();
+        Iterator<? extends StatisticalSummary> iterator = statistics.iterator();
         if (!iterator.hasNext()) {
             return null;
         }
-        SummaryStatistics current = iterator.next();
+        StatisticalSummary current = iterator.next();
         long n = current.getN();
         double min = current.getMin();
         double sum = current.getSum();
         double max = current.getMax();
-        double m2 = current.getSecondMoment();
+        double var = current.getVariance();
+        double m2 = var * (n - 1d);
         double mean = current.getMean();
         while (iterator.hasNext()) {
             current = iterator.next();
@@ -331,7 +332,8 @@ public class AggregateSummaryStatistics implements StatisticalSummary,
             n += curN;
             final double meanDiff = current.getMean() - mean;
             mean = sum / n;
-            m2 = m2 + current.getSecondMoment() + meanDiff * meanDiff * oldN * curN / n;
+            final double curM2 = current.getVariance() * (curN - 1d);
+            m2 = m2 + curM2 + meanDiff * meanDiff * oldN * curN / n;
         }
         final double variance;
         if (n == 0) {

http://git-wip-us.apache.org/repos/asf/commons-math/blob/5511eec3/src/test/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatisticsTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatisticsTest.java b/src/test/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatisticsTest.java
index ce48dbe..2db1ee4 100644
--- a/src/test/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatisticsTest.java
+++ b/src/test/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatisticsTest.java
@@ -33,7 +33,6 @@ import org.junit.Test;
 
 /**
  * Test cases for {@link AggregateSummaryStatistics}
- *
  */
 public class AggregateSummaryStatisticsTest {
 
@@ -129,7 +128,6 @@ public class AggregateSummaryStatisticsTest {
      * partition and comparing the result of aggregate(...) applied to the collection
      * of per-partition SummaryStatistics with a single SummaryStatistics computed
      * over the full sample.
-     *
      */
     @Test
     public void testAggregate() {
@@ -163,6 +161,42 @@ public class AggregateSummaryStatisticsTest {
         assertEquals(totalStats.getSummary(), aggregatedStats, 10E-12);
     }
 
+    /**
+     * Similar to {@link #testAggregate()} but operating on
+     * {@link StatisticalSummary} instead.
+     */
+    @Test
+    public void testAggregateStatisticalSummary() {
+
+        // Generate a random sample and random partition
+        double[] totalSample = generateSample();
+        double[][] subSamples = generatePartition(totalSample);
+        int nSamples = subSamples.length;
+
+        // Compute combined stats directly
+        SummaryStatistics totalStats = new SummaryStatistics();
+        for (int i = 0; i < totalSample.length; i++) {
+            totalStats.addValue(totalSample[i]);
+        }
+
+        // Now compute subsample stats individually and aggregate
+        SummaryStatistics[] subSampleStats = new SummaryStatistics[nSamples];
+        for (int i = 0; i < nSamples; i++) {
+            subSampleStats[i] = new SummaryStatistics();
+        }
+        Collection<StatisticalSummary> aggregate = new ArrayList<StatisticalSummary>();
+        for (int i = 0; i < nSamples; i++) {
+            for (int j = 0; j < subSamples[i].length; j++) {
+                subSampleStats[i].addValue(subSamples[i][j]);
+            }
+            aggregate.add(subSampleStats[i].getSummary());
+        }
+
+        // Compare values
+        StatisticalSummary aggregatedStats = AggregateSummaryStatistics.aggregate(aggregate);
+        assertEquals(totalStats.getSummary(), aggregatedStats, 10E-12);
+    }
+
 
     @Test
     public void testAggregateDegenerate() {
@@ -266,7 +300,7 @@ public class AggregateSummaryStatisticsTest {
         final double[][] out = new double[5][];
         int cur = 0;          // beginning of current partition segment
         int offset = 0;       // end of current partition segment
-        int sampleCount = 0;  // number of segments defined 
+        int sampleCount = 0;  // number of segments defined
         for (int i = 0; i < 5; i++) {
             if (cur == length || offset == length) {
                 break;