You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2015/07/02 20:28:17 UTC

spark git commit: [SPARK-8479] [MLLIB] Add numNonzeros and numActives to linalg.Matrices

Repository: spark
Updated Branches:
  refs/heads/master 2e2f32603 -> 34d448dbe


[SPARK-8479] [MLLIB] Add numNonzeros and numActives to linalg.Matrices

Matrices allow zeros to be stored in values. Sometimes a method is handy to check if the numNonZeros are same as number of Active values.

Author: MechCoder <ma...@gmail.com>

Closes #6904 from MechCoder/nnz_matrix and squashes the following commits:

252c6b7 [MechCoder] Add to MiMa excludes
e2390f5 [MechCoder] Use count instead of foreach
2f62b2f [MechCoder] Add to MiMa excludes
d6e96ef [MechCoder] [SPARK-8479] Add numNonzeros and numActives to linalg.Matrices


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/34d448db
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/34d448db
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/34d448db

Branch: refs/heads/master
Commit: 34d448dbe1d7bd5bf9a8d6ef473878e570ca6161
Parents: 2e2f326
Author: MechCoder <ma...@gmail.com>
Authored: Thu Jul 2 11:28:14 2015 -0700
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Thu Jul 2 11:28:14 2015 -0700

----------------------------------------------------------------------
 .../org/apache/spark/mllib/linalg/Matrices.scala | 19 +++++++++++++++++++
 .../spark/mllib/linalg/MatricesSuite.scala       | 10 ++++++++++
 project/MimaExcludes.scala                       |  6 ++++++
 3 files changed, 35 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/34d448db/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index 0a61549..75e7004 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -114,6 +114,16 @@ sealed trait Matrix extends Serializable {
    *          corresponding value in the matrix with type `Double`.
    */
   private[spark] def foreachActive(f: (Int, Int, Double) => Unit)
+
+  /**
+   * Find the number of non-zero active values.
+   */
+  def numNonzeros: Int
+
+  /**
+   * Find the number of values stored explicitly. These values can be zero as well.
+   */
+  def numActives: Int
 }
 
 @DeveloperApi
@@ -324,6 +334,10 @@ class DenseMatrix(
     }
   }
 
+  override def numNonzeros: Int = values.count(_ != 0)
+
+  override def numActives: Int = values.length
+
   /**
    * Generate a `SparseMatrix` from the given `DenseMatrix`. The new matrix will have isTransposed
    * set to false.
@@ -593,6 +607,11 @@ class SparseMatrix(
   def toDense: DenseMatrix = {
     new DenseMatrix(numRows, numCols, toArray)
   }
+
+  override def numNonzeros: Int = values.count(_ != 0)
+
+  override def numActives: Int = values.length
+
 }
 
 /**

http://git-wip-us.apache.org/repos/asf/spark/blob/34d448db/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
index 8dbb70f..a270ba2 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
@@ -455,4 +455,14 @@ class MatricesSuite extends SparkFunSuite {
     lines = mat.toString(5, 100).lines.toArray
     assert(lines.size == 5 && lines.forall(_.size <= 100))
   }
+
+  test("numNonzeros and numActives") {
+    val dm1 = Matrices.dense(3, 2, Array(0, 0, -1, 1, 0, 1))
+    assert(dm1.numNonzeros === 3)
+    assert(dm1.numActives === 6)
+
+    val sm1 = Matrices.sparse(3, 2, Array(0, 2, 3), Array(0, 2, 1), Array(0.0, -1.2, 0.0))
+    assert(sm1.numNonzeros === 1)
+    assert(sm1.numActives === 3)
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/34d448db/project/MimaExcludes.scala
----------------------------------------------------------------------
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 6f86a50..680b699 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -75,6 +75,12 @@ object MimaExcludes {
               "org.apache.spark.sql.parquet.ParquetTypeInfo"),
             ProblemFilters.exclude[MissingClassProblem](
               "org.apache.spark.sql.parquet.ParquetTypeInfo$")
+          ) ++ Seq(
+            // SPARK-8479 Add numNonzeros and numActives to Matrix.
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.mllib.linalg.Matrix.numNonzeros"),
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.mllib.linalg.Matrix.numActives")
           )
         case v if v.startsWith("1.4") =>
           Seq(


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org