You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2015/10/09 07:22:23 UTC

spark git commit: [SPARK-10875] [MLLIB] Computed covariance matrix should be symmetric

Repository: spark
Updated Branches:
  refs/heads/master 5410747a8 -> 5994cfe81


[SPARK-10875] [MLLIB] Computed covariance matrix should be symmetric

Compute upper triangular values of the covariance matrix, then copy to lower triangular values.

Author: Nick Pritchard <ni...@falkonry.com>

Closes #8940 from pnpritchard/SPARK-10875.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5994cfe8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5994cfe8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5994cfe8

Branch: refs/heads/master
Commit: 5994cfe81271a39294aa29fd47aa94c99aa56743
Parents: 5410747
Author: Nick Pritchard <ni...@falkonry.com>
Authored: Thu Oct 8 22:22:20 2015 -0700
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Thu Oct 8 22:22:20 2015 -0700

----------------------------------------------------------------------
 .../mllib/linalg/distributed/RowMatrix.scala      |  6 ++++--
 .../mllib/linalg/distributed/RowMatrixSuite.scala | 18 ++++++++++++++++++
 2 files changed, 22 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/5994cfe8/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index 7c7d900..b8a7adc 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -357,9 +357,11 @@ class RowMatrix @Since("1.0.0") (
     var alpha = 0.0
     while (i < n) {
       alpha = m / m1 * mean(i)
-      j = 0
+      j = i
       while (j < n) {
-        G(i, j) = G(i, j) / m1 - alpha * mean(j)
+        val Gij = G(i, j) / m1 - alpha * mean(j)
+        G(i, j) = Gij
+        G(j, i) = Gij
         j += 1
       }
       i += 1

http://git-wip-us.apache.org/repos/asf/spark/blob/5994cfe8/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
index 283ffec..4abb98f 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
@@ -24,6 +24,7 @@ import breeze.linalg.{DenseVector => BDV, DenseMatrix => BDM, norm => brzNorm, s
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.mllib.linalg.{Matrices, Vectors, Vector}
+import org.apache.spark.mllib.random.RandomRDDs
 import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext}
 
 class RowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext {
@@ -255,6 +256,23 @@ class RowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext {
       assert(closeToZero(abs(expected.r) - abs(rOnly.R.toBreeze.asInstanceOf[BDM[Double]])))
     }
   }
+
+  test("compute covariance") {
+    for (mat <- Seq(denseMat, sparseMat)) {
+      val result = mat.computeCovariance()
+      val expected = breeze.linalg.cov(mat.toBreeze())
+      assert(closeToZero(abs(expected) - abs(result.toBreeze.asInstanceOf[BDM[Double]])))
+    }
+  }
+
+  test("covariance matrix is symmetric (SPARK-10875)") {
+    val rdd = RandomRDDs.normalVectorRDD(sc, 100, 10, 0, 0)
+    val matrix = new RowMatrix(rdd)
+    val cov = matrix.computeCovariance()
+    for (i <- 0 until cov.numRows; j <- 0 until i) {
+      assert(cov(i, j) === cov(j, i))
+    }
+  }
 }
 
 class RowMatrixClusterSuite extends SparkFunSuite with LocalClusterSparkContext {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org