You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by tn...@apache.org on 2015/10/19 21:36:36 UTC
[math] [MATH-837] Support aggregation of any kind of
StatisticalSummary in AggregateSummaryStatistics.
Repository: commons-math
Updated Branches:
refs/heads/MATH_3_X 9f1368715 -> 5511eec3b
[MATH-837] Support aggregation of any kind of StatisticalSummary in AggregateSummaryStatistics.
Project: http://git-wip-us.apache.org/repos/asf/commons-math/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-math/commit/5511eec3
Tree: http://git-wip-us.apache.org/repos/asf/commons-math/tree/5511eec3
Diff: http://git-wip-us.apache.org/repos/asf/commons-math/diff/5511eec3
Branch: refs/heads/MATH_3_X
Commit: 5511eec3b4dff7e50c6d662b32749b25d18c5b91
Parents: 9f13687
Author: Thomas Neidhart <th...@gmail.com>
Authored: Mon Oct 19 21:36:15 2015 +0200
Committer: Thomas Neidhart <th...@gmail.com>
Committed: Mon Oct 19 21:36:15 2015 +0200
----------------------------------------------------------------------
src/changes/changes.xml | 4 ++
.../descriptive/AggregateSummaryStatistics.java | 12 +++---
.../AggregateSummaryStatisticsTest.java | 40 ++++++++++++++++++--
3 files changed, 48 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-math/blob/5511eec3/src/changes/changes.xml
----------------------------------------------------------------------
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 1d0de9a..8d466fd 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -51,6 +51,10 @@ If the output is not quite correct, check for invisible trailing spaces!
</properties>
<body>
<release version="3.6" date="XXXX-XX-XX" description="">
+ <action dev="tn" type="add" issue="MATH-837">
+ "AggregateSummaryStatistics" can now aggregate any kind of
+ "StatisticalSummary".
+ </action>
<action dev="erans" type="fix" issue="MATH-1279">
Check precondition (class "o.a.c.m.random.EmpiricalDistribution").
</action>
http://git-wip-us.apache.org/repos/asf/commons-math/blob/5511eec3/src/main/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatistics.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatistics.java b/src/main/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatistics.java
index ebc3e33..b1ccce5 100644
--- a/src/main/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatistics.java
+++ b/src/main/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatistics.java
@@ -302,20 +302,21 @@ public class AggregateSummaryStatistics implements StatisticalSummary,
* @param statistics collection of SummaryStatistics to aggregate
* @return summary statistics for the combined dataset
*/
- public static StatisticalSummaryValues aggregate(Collection<SummaryStatistics> statistics) {
+ public static StatisticalSummaryValues aggregate(Collection<? extends StatisticalSummary> statistics) {
if (statistics == null) {
return null;
}
- Iterator<SummaryStatistics> iterator = statistics.iterator();
+ Iterator<? extends StatisticalSummary> iterator = statistics.iterator();
if (!iterator.hasNext()) {
return null;
}
- SummaryStatistics current = iterator.next();
+ StatisticalSummary current = iterator.next();
long n = current.getN();
double min = current.getMin();
double sum = current.getSum();
double max = current.getMax();
- double m2 = current.getSecondMoment();
+ double var = current.getVariance();
+ double m2 = var * (n - 1d);
double mean = current.getMean();
while (iterator.hasNext()) {
current = iterator.next();
@@ -331,7 +332,8 @@ public class AggregateSummaryStatistics implements StatisticalSummary,
n += curN;
final double meanDiff = current.getMean() - mean;
mean = sum / n;
- m2 = m2 + current.getSecondMoment() + meanDiff * meanDiff * oldN * curN / n;
+ final double curM2 = current.getVariance() * (curN - 1d);
+ m2 = m2 + curM2 + meanDiff * meanDiff * oldN * curN / n;
}
final double variance;
if (n == 0) {
http://git-wip-us.apache.org/repos/asf/commons-math/blob/5511eec3/src/test/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatisticsTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatisticsTest.java b/src/test/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatisticsTest.java
index ce48dbe..2db1ee4 100644
--- a/src/test/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatisticsTest.java
+++ b/src/test/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatisticsTest.java
@@ -33,7 +33,6 @@ import org.junit.Test;
/**
* Test cases for {@link AggregateSummaryStatistics}
- *
*/
public class AggregateSummaryStatisticsTest {
@@ -129,7 +128,6 @@ public class AggregateSummaryStatisticsTest {
* partition and comparing the result of aggregate(...) applied to the collection
* of per-partition SummaryStatistics with a single SummaryStatistics computed
* over the full sample.
- *
*/
@Test
public void testAggregate() {
@@ -163,6 +161,42 @@ public class AggregateSummaryStatisticsTest {
assertEquals(totalStats.getSummary(), aggregatedStats, 10E-12);
}
+ /**
+ * Similar to {@link #testAggregate()} but operating on
+ * {@link StatisticalSummary} instead.
+ */
+ @Test
+ public void testAggregateStatisticalSummary() {
+
+ // Generate a random sample and random partition
+ double[] totalSample = generateSample();
+ double[][] subSamples = generatePartition(totalSample);
+ int nSamples = subSamples.length;
+
+ // Compute combined stats directly
+ SummaryStatistics totalStats = new SummaryStatistics();
+ for (int i = 0; i < totalSample.length; i++) {
+ totalStats.addValue(totalSample[i]);
+ }
+
+ // Now compute subsample stats individually and aggregate
+ SummaryStatistics[] subSampleStats = new SummaryStatistics[nSamples];
+ for (int i = 0; i < nSamples; i++) {
+ subSampleStats[i] = new SummaryStatistics();
+ }
+ Collection<StatisticalSummary> aggregate = new ArrayList<StatisticalSummary>();
+ for (int i = 0; i < nSamples; i++) {
+ for (int j = 0; j < subSamples[i].length; j++) {
+ subSampleStats[i].addValue(subSamples[i][j]);
+ }
+ aggregate.add(subSampleStats[i].getSummary());
+ }
+
+ // Compare values
+ StatisticalSummary aggregatedStats = AggregateSummaryStatistics.aggregate(aggregate);
+ assertEquals(totalStats.getSummary(), aggregatedStats, 10E-12);
+ }
+
@Test
public void testAggregateDegenerate() {
@@ -266,7 +300,7 @@ public class AggregateSummaryStatisticsTest {
final double[][] out = new double[5][];
int cur = 0; // beginning of current partition segment
int offset = 0; // end of current partition segment
- int sampleCount = 0; // number of segments defined
+ int sampleCount = 0; // number of segments defined
for (int i = 0; i < 5; i++) {
if (cur == length || offset == length) {
break;