You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2014/11/14 00:36:06 UTC
spark git commit: [branch-1.1][SPARK-4355] OnlineSummarizer doesn't
merge mean correctly
Repository: spark
Updated Branches:
refs/heads/branch-1.1 685bdd2b7 -> 4b1c77cbf
[branch-1.1][SPARK-4355] OnlineSummarizer doesn't merge mean correctly
andrewor14 This backports the bug fix in #3220 . It would be good if we can get it in 1.1.1. But this is minor.
Author: Xiangrui Meng <me...@databricks.com>
Closes #3251 from mengxr/SPARK-4355-1.1 and squashes the following commits:
33886b6 [Xiangrui Meng] Merge remote-tracking branch 'apache/branch-1.1' into SPARK-4355-1.1
91fe1a3 [Xiangrui Meng] fix OnlineSummarizer.merge when other.mean is zero
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4b1c77cb
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4b1c77cb
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4b1c77cb
Branch: refs/heads/branch-1.1
Commit: 4b1c77cbf59ccc752bc0d0291df3550cbfbe730c
Parents: 685bdd2
Author: Xiangrui Meng <me...@databricks.com>
Authored: Thu Nov 13 15:36:03 2014 -0800
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Thu Nov 13 15:36:03 2014 -0800
----------------------------------------------------------------------
.../stat/MultivariateOnlineSummarizer.scala | 20 +++++++++-----------
1 file changed, 9 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/4b1c77cb/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
index 7d845c4..f23eb5b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
@@ -104,21 +104,19 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
val deltaMean: BDV[Double] = currMean - other.currMean
var i = 0
while (i < n) {
- // merge mean together
- if (other.currMean(i) != 0.0) {
+ if (nnz(i) + other.nnz(i) != 0.0) {
+ // merge mean together
currMean(i) = (currMean(i) * nnz(i) + other.currMean(i) * other.nnz(i)) /
(nnz(i) + other.nnz(i))
- }
- // merge m2n together
- if (nnz(i) + other.nnz(i) != 0.0) {
+ // merge m2n together
currM2n(i) += other.currM2n(i) + deltaMean(i) * deltaMean(i) * nnz(i) * other.nnz(i) /
(nnz(i) + other.nnz(i))
- }
- if (currMax(i) < other.currMax(i)) {
- currMax(i) = other.currMax(i)
- }
- if (currMin(i) > other.currMin(i)) {
- currMin(i) = other.currMin(i)
+ if (currMax(i) < other.currMax(i)) {
+ currMax(i) = other.currMax(i)
+ }
+ if (currMin(i) > other.currMin(i)) {
+ currMin(i) = other.currMin(i)
+ }
}
i += 1
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org