You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2015/05/08 23:41:19 UTC

spark git commit: [SPARK-7390] [SQL] Only merge other CovarianceCounter when its count is greater than zero

Repository: spark
Updated Branches:
  refs/heads/master 5467c34c3 -> 90527f560


[SPARK-7390] [SQL] Only merge other CovarianceCounter when its count is greater than zero

JIRA: https://issues.apache.org/jira/browse/SPARK-7390

Also fix a minor typo.

Author: Liang-Chi Hsieh <vi...@gmail.com>

Closes #5931 from viirya/fix_covariancecounter and squashes the following commits:

352eda6 [Liang-Chi Hsieh] Only merge other CovarianceCounter when its count is greater than zero.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/90527f56
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/90527f56
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/90527f56

Branch: refs/heads/master
Commit: 90527f560462cc2d693176bd961b02767e460e06
Parents: 5467c34
Author: Liang-Chi Hsieh <vi...@gmail.com>
Authored: Fri May 8 14:41:16 2015 -0700
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Fri May 8 14:41:16 2015 -0700

----------------------------------------------------------------------
 .../sql/execution/stat/StatFunctions.scala      | 22 +++++++++++---------
 1 file changed, 12 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/90527f56/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
index 386ac96..71b7f6c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
@@ -38,7 +38,7 @@ private[sql] object StatFunctions extends Logging {
     var yAvg = 0.0 // the mean of all examples seen so far in col2
     var Ck = 0.0 // the co-moment after k examples
     var MkX = 0.0 // sum of squares of differences from the (current) mean for col1
-    var MkY = 0.0 // sum of squares of differences from the (current) mean for col1
+    var MkY = 0.0 // sum of squares of differences from the (current) mean for col2
     var count = 0L // count of observed examples
     // add an example to the calculation
     def add(x: Double, y: Double): this.type = {
@@ -55,15 +55,17 @@ private[sql] object StatFunctions extends Logging {
     // merge counters from other partitions. Formula can be found at:
     // http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
     def merge(other: CovarianceCounter): this.type = {
-      val totalCount = count + other.count
-      val deltaX = xAvg - other.xAvg
-      val deltaY = yAvg - other.yAvg
-      Ck += other.Ck + deltaX * deltaY * count / totalCount * other.count
-      xAvg = (xAvg * count + other.xAvg * other.count) / totalCount
-      yAvg = (yAvg * count + other.yAvg * other.count) / totalCount
-      MkX += other.MkX + deltaX * deltaX * count / totalCount * other.count
-      MkY += other.MkY + deltaY * deltaY * count / totalCount * other.count
-      count = totalCount
+      if (other.count > 0) {
+        val totalCount = count + other.count
+        val deltaX = xAvg - other.xAvg
+        val deltaY = yAvg - other.yAvg
+        Ck += other.Ck + deltaX * deltaY * count / totalCount * other.count
+        xAvg = (xAvg * count + other.xAvg * other.count) / totalCount
+        yAvg = (yAvg * count + other.yAvg * other.count) / totalCount
+        MkX += other.MkX + deltaX * deltaX * count / totalCount * other.count
+        MkY += other.MkY + deltaY * deltaY * count / totalCount * other.count
+        count = totalCount
+      }
       this
     }
     // return the sample covariance for the observed examples


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org