You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2015/01/21 18:48:45 UTC
spark git commit: [MLlib] [SPARK-5301] Missing conversions and
operations on IndexedRowMatrix and CoordinateMatrix
Repository: spark
Updated Branches:
refs/heads/master 2eeada373 -> aa1e22b17
[MLlib] [SPARK-5301] Missing conversions and operations on IndexedRowMatrix and CoordinateMatrix
* Transpose is missing from CoordinateMatrix (this is cheap to compute, so it should be there)
* IndexedRowMatrix should be convertable to CoordinateMatrix (conversion added)
Tests for both added.
Author: Reza Zadeh <re...@databricks.com>
Closes #4089 from rezazadeh/matutils and squashes the following commits:
ec5238b [Reza Zadeh] Array -> Iterator to avoid temp array
3ce0b5d [Reza Zadeh] Array -> Iterator
bbc907a [Reza Zadeh] Use 'i' for index, and zipWithIndex
cb10ae5 [Reza Zadeh] remove unnecessary import
a7ae048 [Reza Zadeh] Missing linear algebra utilities
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/aa1e22b1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/aa1e22b1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/aa1e22b1
Branch: refs/heads/master
Commit: aa1e22b17b4ce885febe6970a2451c7d17d0acfb
Parents: 2eeada3
Author: Reza Zadeh <re...@databricks.com>
Authored: Wed Jan 21 09:48:38 2015 -0800
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Wed Jan 21 09:48:38 2015 -0800
----------------------------------------------------------------------
.../linalg/distributed/CoordinateMatrix.scala | 5 +++++
.../linalg/distributed/IndexedRowMatrix.scala | 17 +++++++++++++++++
.../linalg/distributed/CoordinateMatrixSuite.scala | 5 +++++
.../linalg/distributed/IndexedRowMatrixSuite.scala | 8 ++++++++
4 files changed, 35 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/aa1e22b1/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
index 06d8915..b60559c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
@@ -69,6 +69,11 @@ class CoordinateMatrix(
nRows
}
+ /** Transposes this CoordinateMatrix. */
+ def transpose(): CoordinateMatrix = {
+ new CoordinateMatrix(entries.map(x => MatrixEntry(x.j, x.i, x.value)), numCols(), numRows())
+ }
+
/** Converts to IndexedRowMatrix. The number of columns must be within the integer range. */
def toIndexedRowMatrix(): IndexedRowMatrix = {
val nl = numCols()
http://git-wip-us.apache.org/repos/asf/spark/blob/aa1e22b1/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
index 181f507..c518271 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
@@ -76,6 +76,23 @@ class IndexedRowMatrix(
}
/**
+ * Converts this matrix to a
+ * [[org.apache.spark.mllib.linalg.distributed.CoordinateMatrix]].
+ */
+ def toCoordinateMatrix(): CoordinateMatrix = {
+ val entries = rows.flatMap { row =>
+ val rowIndex = row.index
+ row.vector match {
+ case SparseVector(size, indices, values) =>
+ Iterator.tabulate(indices.size)(i => MatrixEntry(rowIndex, indices(i), values(i)))
+ case DenseVector(values) =>
+ Iterator.tabulate(values.size)(i => MatrixEntry(rowIndex, i, values(i)))
+ }
+ }
+ new CoordinateMatrix(entries, numRows(), numCols())
+ }
+
+ /**
* Computes the singular value decomposition of this IndexedRowMatrix.
* Denote this matrix by A (m x n), this will compute matrices U, S, V such that A = U * S * V'.
*
http://git-wip-us.apache.org/repos/asf/spark/blob/aa1e22b1/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala
index f870975..80bef81 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala
@@ -73,6 +73,11 @@ class CoordinateMatrixSuite extends FunSuite with MLlibTestSparkContext {
assert(mat.toBreeze() === expected)
}
+ test("transpose") {
+ val transposed = mat.transpose()
+ assert(mat.toBreeze().t === transposed.toBreeze())
+ }
+
test("toIndexedRowMatrix") {
val indexedRowMatrix = mat.toIndexedRowMatrix()
val expected = BDM(
http://git-wip-us.apache.org/repos/asf/spark/blob/aa1e22b1/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
index 741cd49..b86c2ca 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
@@ -80,6 +80,14 @@ class IndexedRowMatrixSuite extends FunSuite with MLlibTestSparkContext {
assert(rowMat.rows.collect().toSeq === data.map(_.vector).toSeq)
}
+ test("toCoordinateMatrix") {
+ val idxRowMat = new IndexedRowMatrix(indexedRows)
+ val coordMat = idxRowMat.toCoordinateMatrix()
+ assert(coordMat.numRows() === m)
+ assert(coordMat.numCols() === n)
+ assert(coordMat.toBreeze() === idxRowMat.toBreeze())
+ }
+
test("multiply a local matrix") {
val A = new IndexedRowMatrix(indexedRows)
val B = Matrices.dense(3, 2, Array(0.0, 1.0, 2.0, 3.0, 4.0, 5.0))
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org