You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2014/11/14 00:36:06 UTC

spark git commit: [branch-1.1][SPARK-4355] OnlineSummarizer doesn't merge mean correctly

Repository: spark
Updated Branches:
  refs/heads/branch-1.1 685bdd2b7 -> 4b1c77cbf


[branch-1.1][SPARK-4355] OnlineSummarizer doesn't merge mean correctly

andrewor14 This backports the bug fix in #3220 . It would be good if we can get it in 1.1.1. But this is minor.

Author: Xiangrui Meng <me...@databricks.com>

Closes #3251 from mengxr/SPARK-4355-1.1 and squashes the following commits:

33886b6 [Xiangrui Meng] Merge remote-tracking branch 'apache/branch-1.1' into SPARK-4355-1.1
91fe1a3 [Xiangrui Meng] fix OnlineSummarizer.merge when other.mean is zero


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4b1c77cb
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4b1c77cb
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4b1c77cb

Branch: refs/heads/branch-1.1
Commit: 4b1c77cbf59ccc752bc0d0291df3550cbfbe730c
Parents: 685bdd2
Author: Xiangrui Meng <me...@databricks.com>
Authored: Thu Nov 13 15:36:03 2014 -0800
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Thu Nov 13 15:36:03 2014 -0800

----------------------------------------------------------------------
 .../stat/MultivariateOnlineSummarizer.scala     | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/4b1c77cb/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
index 7d845c4..f23eb5b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
@@ -104,21 +104,19 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
       val deltaMean: BDV[Double] = currMean - other.currMean
       var i = 0
       while (i < n) {
-        // merge mean together
-        if (other.currMean(i) != 0.0) {
+        if (nnz(i) + other.nnz(i) != 0.0) {
+          // merge mean together
           currMean(i) = (currMean(i) * nnz(i) + other.currMean(i) * other.nnz(i)) /
             (nnz(i) + other.nnz(i))
-        }
-        // merge m2n together
-        if (nnz(i) + other.nnz(i) != 0.0) {
+          // merge m2n together
           currM2n(i) += other.currM2n(i) + deltaMean(i) * deltaMean(i) * nnz(i) * other.nnz(i) /
             (nnz(i) + other.nnz(i))
-        }
-        if (currMax(i) < other.currMax(i)) {
-          currMax(i) = other.currMax(i)
-        }
-        if (currMin(i) > other.currMin(i)) {
-          currMin(i) = other.currMin(i)
+          if (currMax(i) < other.currMax(i)) {
+            currMax(i) = other.currMax(i)
+          }
+          if (currMin(i) > other.currMin(i)) {
+            currMin(i) = other.currMin(i)
+          }
         }
         i += 1
       }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org