You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2015/01/21 18:48:45 UTC

spark git commit: [MLlib] [SPARK-5301] Missing conversions and operations on IndexedRowMatrix and CoordinateMatrix

Repository: spark
Updated Branches:
  refs/heads/master 2eeada373 -> aa1e22b17


[MLlib] [SPARK-5301] Missing conversions and operations on IndexedRowMatrix and CoordinateMatrix

* Transpose is missing from CoordinateMatrix (this is cheap to compute, so it should be there)
* IndexedRowMatrix should be convertable to CoordinateMatrix (conversion added)

Tests for both added.

Author: Reza Zadeh <re...@databricks.com>

Closes #4089 from rezazadeh/matutils and squashes the following commits:

ec5238b [Reza Zadeh] Array -> Iterator to avoid temp array
3ce0b5d [Reza Zadeh] Array -> Iterator
bbc907a [Reza Zadeh] Use 'i' for index, and zipWithIndex
cb10ae5 [Reza Zadeh] remove unnecessary import
a7ae048 [Reza Zadeh] Missing linear algebra utilities


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/aa1e22b1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/aa1e22b1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/aa1e22b1

Branch: refs/heads/master
Commit: aa1e22b17b4ce885febe6970a2451c7d17d0acfb
Parents: 2eeada3
Author: Reza Zadeh <re...@databricks.com>
Authored: Wed Jan 21 09:48:38 2015 -0800
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Wed Jan 21 09:48:38 2015 -0800

----------------------------------------------------------------------
 .../linalg/distributed/CoordinateMatrix.scala      |  5 +++++
 .../linalg/distributed/IndexedRowMatrix.scala      | 17 +++++++++++++++++
 .../linalg/distributed/CoordinateMatrixSuite.scala |  5 +++++
 .../linalg/distributed/IndexedRowMatrixSuite.scala |  8 ++++++++
 4 files changed, 35 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/aa1e22b1/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
index 06d8915..b60559c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
@@ -69,6 +69,11 @@ class CoordinateMatrix(
     nRows
   }
 
+  /** Transposes this CoordinateMatrix. */
+  def transpose(): CoordinateMatrix = {
+    new CoordinateMatrix(entries.map(x => MatrixEntry(x.j, x.i, x.value)), numCols(), numRows())
+  }
+
   /** Converts to IndexedRowMatrix. The number of columns must be within the integer range. */
   def toIndexedRowMatrix(): IndexedRowMatrix = {
     val nl = numCols()

http://git-wip-us.apache.org/repos/asf/spark/blob/aa1e22b1/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
index 181f507..c518271 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
@@ -76,6 +76,23 @@ class IndexedRowMatrix(
   }
 
   /**
+   * Converts this matrix to a
+   * [[org.apache.spark.mllib.linalg.distributed.CoordinateMatrix]].
+   */
+  def toCoordinateMatrix(): CoordinateMatrix = {
+    val entries = rows.flatMap { row =>
+      val rowIndex = row.index
+      row.vector match {
+        case SparseVector(size, indices, values) =>
+          Iterator.tabulate(indices.size)(i => MatrixEntry(rowIndex, indices(i), values(i)))
+        case DenseVector(values) =>
+          Iterator.tabulate(values.size)(i => MatrixEntry(rowIndex, i, values(i)))
+      }
+    }
+    new CoordinateMatrix(entries, numRows(), numCols())
+  }
+
+  /**
    * Computes the singular value decomposition of this IndexedRowMatrix.
    * Denote this matrix by A (m x n), this will compute matrices U, S, V such that A = U * S * V'.
    *

http://git-wip-us.apache.org/repos/asf/spark/blob/aa1e22b1/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala
index f870975..80bef81 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala
@@ -73,6 +73,11 @@ class CoordinateMatrixSuite extends FunSuite with MLlibTestSparkContext {
     assert(mat.toBreeze() === expected)
   }
 
+  test("transpose") {
+    val transposed = mat.transpose()
+    assert(mat.toBreeze().t === transposed.toBreeze())
+  }
+
   test("toIndexedRowMatrix") {
     val indexedRowMatrix = mat.toIndexedRowMatrix()
     val expected = BDM(

http://git-wip-us.apache.org/repos/asf/spark/blob/aa1e22b1/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
index 741cd49..b86c2ca 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
@@ -80,6 +80,14 @@ class IndexedRowMatrixSuite extends FunSuite with MLlibTestSparkContext {
     assert(rowMat.rows.collect().toSeq === data.map(_.vector).toSeq)
   }
 
+  test("toCoordinateMatrix") {
+    val idxRowMat = new IndexedRowMatrix(indexedRows)
+    val coordMat = idxRowMat.toCoordinateMatrix()
+    assert(coordMat.numRows() === m)
+    assert(coordMat.numCols() === n)
+    assert(coordMat.toBreeze() === idxRowMat.toBreeze())
+  }
+
   test("multiply a local matrix") {
     val A = new IndexedRowMatrix(indexedRows)
     val B = Matrices.dense(3, 2, Array(0.0, 1.0, 2.0, 3.0, 4.0, 5.0))


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org