You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@apex.apache.org by vr...@apache.org on 2017/08/27 16:11:11 UTC

[apex-malhar] branch master updated: APEXMALHAR-2489 Change algorithm for running average

This is an automated email from the ASF dual-hosted git repository.

vrozov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apex-malhar.git


The following commit(s) were added to refs/heads/master by this push:
     new 8321395  APEXMALHAR-2489 Change algorithm for running average
8321395 is described below

commit 83213956ad7e208b2487868608c5223ecc840a21
Author: Florian Schmidt <fl...@icloud.com>
AuthorDate: Fri Jul 28 10:46:27 2017 -0700

    APEXMALHAR-2489 Change algorithm for running average
    
    The current algorithm for calculating the running average was subject to
    a potential overflow, because part of the formula required the
    average value (average) to be multiplied with the number of processed
    tuples (count). average * count would for example overflow when e.g.
    average > Double.MAX_VALUE and count >=2
    
    This commit changes the formula used to the one described on
    http://www.heikohoffmann.de/htmlthesis/node134.html, where such a
    multiplication is not necessary anymore. It also adds a unit test which
    checks that there is not overflow occuring anymore
---
 .../main/java/com/datatorrent/lib/math/RunningAverage.java    |  7 ++++---
 .../java/com/datatorrent/lib/math/RunningAverageTest.java     | 11 ++++++++++-
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/library/src/main/java/com/datatorrent/lib/math/RunningAverage.java b/library/src/main/java/com/datatorrent/lib/math/RunningAverage.java
index e3b0edf..63389ea 100644
--- a/library/src/main/java/com/datatorrent/lib/math/RunningAverage.java
+++ b/library/src/main/java/com/datatorrent/lib/math/RunningAverage.java
@@ -64,9 +64,10 @@ public class RunningAverage extends BaseOperator
     @Override
     public void process(Number tuple)
     {
-      double sum = (RunningAverage.this.count * average) + tuple.doubleValue();
-      RunningAverage.this.count++;
-      average = sum / RunningAverage.this.count;
+      // Floating mean as explained on http://www.heikohoffmann.de/htmlthesis/node134.html
+      double firstPart = (1.0 / (++RunningAverage.this.count));
+      double secondPart = (tuple.doubleValue() - average);
+      average += (firstPart * secondPart);
     }
   };
 
diff --git a/library/src/test/java/com/datatorrent/lib/math/RunningAverageTest.java b/library/src/test/java/com/datatorrent/lib/math/RunningAverageTest.java
index 52da631..3c5e026 100644
--- a/library/src/test/java/com/datatorrent/lib/math/RunningAverageTest.java
+++ b/library/src/test/java/com/datatorrent/lib/math/RunningAverageTest.java
@@ -29,8 +29,17 @@ import static org.junit.Assert.assertEquals;
  */
 public class RunningAverageTest
 {
-  public RunningAverageTest()
+
+  @Test
+  public void testDoesNotOverflow()
   {
+    RunningAverage instance = new RunningAverage();
+    instance.input.process(Double.MAX_VALUE);
+    assertEquals("first average", Double.MAX_VALUE, instance.average, 0);
+
+    instance.input.process(Double.MAX_VALUE);
+
+    assertEquals("second average", Double.MAX_VALUE, instance.average, 0);
   }
 
   @Test

-- 
To stop receiving notification emails like this one, please contact
['"commits@apex.apache.org" <co...@apex.apache.org>'].