You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2015/07/02 20:28:17 UTC
spark git commit: [SPARK-8479] [MLLIB] Add numNonzeros and numActives
to linalg.Matrices
Repository: spark
Updated Branches:
refs/heads/master 2e2f32603 -> 34d448dbe
[SPARK-8479] [MLLIB] Add numNonzeros and numActives to linalg.Matrices
Matrices allow zeros to be stored in values. Sometimes a method is handy to check if the numNonZeros are same as number of Active values.
Author: MechCoder <ma...@gmail.com>
Closes #6904 from MechCoder/nnz_matrix and squashes the following commits:
252c6b7 [MechCoder] Add to MiMa excludes
e2390f5 [MechCoder] Use count instead of foreach
2f62b2f [MechCoder] Add to MiMa excludes
d6e96ef [MechCoder] [SPARK-8479] Add numNonzeros and numActives to linalg.Matrices
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/34d448db
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/34d448db
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/34d448db
Branch: refs/heads/master
Commit: 34d448dbe1d7bd5bf9a8d6ef473878e570ca6161
Parents: 2e2f326
Author: MechCoder <ma...@gmail.com>
Authored: Thu Jul 2 11:28:14 2015 -0700
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Thu Jul 2 11:28:14 2015 -0700
----------------------------------------------------------------------
.../org/apache/spark/mllib/linalg/Matrices.scala | 19 +++++++++++++++++++
.../spark/mllib/linalg/MatricesSuite.scala | 10 ++++++++++
project/MimaExcludes.scala | 6 ++++++
3 files changed, 35 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/34d448db/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index 0a61549..75e7004 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -114,6 +114,16 @@ sealed trait Matrix extends Serializable {
* corresponding value in the matrix with type `Double`.
*/
private[spark] def foreachActive(f: (Int, Int, Double) => Unit)
+
+ /**
+ * Find the number of non-zero active values.
+ */
+ def numNonzeros: Int
+
+ /**
+ * Find the number of values stored explicitly. These values can be zero as well.
+ */
+ def numActives: Int
}
@DeveloperApi
@@ -324,6 +334,10 @@ class DenseMatrix(
}
}
+ override def numNonzeros: Int = values.count(_ != 0)
+
+ override def numActives: Int = values.length
+
/**
* Generate a `SparseMatrix` from the given `DenseMatrix`. The new matrix will have isTransposed
* set to false.
@@ -593,6 +607,11 @@ class SparseMatrix(
def toDense: DenseMatrix = {
new DenseMatrix(numRows, numCols, toArray)
}
+
+ override def numNonzeros: Int = values.count(_ != 0)
+
+ override def numActives: Int = values.length
+
}
/**
http://git-wip-us.apache.org/repos/asf/spark/blob/34d448db/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
index 8dbb70f..a270ba2 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
@@ -455,4 +455,14 @@ class MatricesSuite extends SparkFunSuite {
lines = mat.toString(5, 100).lines.toArray
assert(lines.size == 5 && lines.forall(_.size <= 100))
}
+
+ test("numNonzeros and numActives") {
+ val dm1 = Matrices.dense(3, 2, Array(0, 0, -1, 1, 0, 1))
+ assert(dm1.numNonzeros === 3)
+ assert(dm1.numActives === 6)
+
+ val sm1 = Matrices.sparse(3, 2, Array(0, 2, 3), Array(0, 2, 1), Array(0.0, -1.2, 0.0))
+ assert(sm1.numNonzeros === 1)
+ assert(sm1.numActives === 3)
+ }
}
http://git-wip-us.apache.org/repos/asf/spark/blob/34d448db/project/MimaExcludes.scala
----------------------------------------------------------------------
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 6f86a50..680b699 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -75,6 +75,12 @@ object MimaExcludes {
"org.apache.spark.sql.parquet.ParquetTypeInfo"),
ProblemFilters.exclude[MissingClassProblem](
"org.apache.spark.sql.parquet.ParquetTypeInfo$")
+ ) ++ Seq(
+ // SPARK-8479 Add numNonzeros and numActives to Matrix.
+ ProblemFilters.exclude[MissingMethodProblem](
+ "org.apache.spark.mllib.linalg.Matrix.numNonzeros"),
+ ProblemFilters.exclude[MissingMethodProblem](
+ "org.apache.spark.mllib.linalg.Matrix.numActives")
)
case v if v.startsWith("1.4") =>
Seq(
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org