You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@apex.apache.org by vr...@apache.org on 2017/08/27 16:11:11 UTC
[apex-malhar] branch master updated: APEXMALHAR-2489 Change
algorithm for running average
This is an automated email from the ASF dual-hosted git repository.
vrozov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apex-malhar.git
The following commit(s) were added to refs/heads/master by this push:
new 8321395 APEXMALHAR-2489 Change algorithm for running average
8321395 is described below
commit 83213956ad7e208b2487868608c5223ecc840a21
Author: Florian Schmidt <fl...@icloud.com>
AuthorDate: Fri Jul 28 10:46:27 2017 -0700
APEXMALHAR-2489 Change algorithm for running average
The current algorithm for calculating the running average was subject to
a potential overflow, because part of the formula required the
average value (average) to be multiplied with the number of processed
tuples (count). average * count would for example overflow when e.g.
average > Double.MAX_VALUE and count >=2
This commit changes the formula used to the one described on
http://www.heikohoffmann.de/htmlthesis/node134.html, where such a
multiplication is not necessary anymore. It also adds a unit test which
checks that there is not overflow occuring anymore
---
.../main/java/com/datatorrent/lib/math/RunningAverage.java | 7 ++++---
.../java/com/datatorrent/lib/math/RunningAverageTest.java | 11 ++++++++++-
2 files changed, 14 insertions(+), 4 deletions(-)
diff --git a/library/src/main/java/com/datatorrent/lib/math/RunningAverage.java b/library/src/main/java/com/datatorrent/lib/math/RunningAverage.java
index e3b0edf..63389ea 100644
--- a/library/src/main/java/com/datatorrent/lib/math/RunningAverage.java
+++ b/library/src/main/java/com/datatorrent/lib/math/RunningAverage.java
@@ -64,9 +64,10 @@ public class RunningAverage extends BaseOperator
@Override
public void process(Number tuple)
{
- double sum = (RunningAverage.this.count * average) + tuple.doubleValue();
- RunningAverage.this.count++;
- average = sum / RunningAverage.this.count;
+ // Floating mean as explained on http://www.heikohoffmann.de/htmlthesis/node134.html
+ double firstPart = (1.0 / (++RunningAverage.this.count));
+ double secondPart = (tuple.doubleValue() - average);
+ average += (firstPart * secondPart);
}
};
diff --git a/library/src/test/java/com/datatorrent/lib/math/RunningAverageTest.java b/library/src/test/java/com/datatorrent/lib/math/RunningAverageTest.java
index 52da631..3c5e026 100644
--- a/library/src/test/java/com/datatorrent/lib/math/RunningAverageTest.java
+++ b/library/src/test/java/com/datatorrent/lib/math/RunningAverageTest.java
@@ -29,8 +29,17 @@ import static org.junit.Assert.assertEquals;
*/
public class RunningAverageTest
{
- public RunningAverageTest()
+
+ @Test
+ public void testDoesNotOverflow()
{
+ RunningAverage instance = new RunningAverage();
+ instance.input.process(Double.MAX_VALUE);
+ assertEquals("first average", Double.MAX_VALUE, instance.average, 0);
+
+ instance.input.process(Double.MAX_VALUE);
+
+ assertEquals("second average", Double.MAX_VALUE, instance.average, 0);
}
@Test
--
To stop receiving notification emails like this one, please contact
['"commits@apex.apache.org" <co...@apex.apache.org>'].