You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by jk...@apache.org on 2015/07/24 19:56:52 UTC

spark git commit: [SPARK-9222] [MLlib] Make class instantiation variables in DistributedLDAModel private[clustering]

Repository: spark
Updated Branches:
  refs/heads/master c2b50d693 -> e25312451


[SPARK-9222] [MLlib] Make class instantiation variables in DistributedLDAModel private[clustering]

This makes it easier to test all the class variables of the DistributedLDAmodel.

Author: MechCoder <ma...@gmail.com>

Closes #7573 from MechCoder/lda_test and squashes the following commits:

2f1a293 [MechCoder] [SPARK-9222] [MLlib] Make class instantiation variables in DistributedLDAModel private[clustering]


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e2531245
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e2531245
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e2531245

Branch: refs/heads/master
Commit: e25312451322969ad716dddf8248b8c17f68323b
Parents: c2b50d6
Author: MechCoder <ma...@gmail.com>
Authored: Fri Jul 24 10:56:48 2015 -0700
Committer: Joseph K. Bradley <jo...@databricks.com>
Committed: Fri Jul 24 10:56:48 2015 -0700

----------------------------------------------------------------------
 .../org/apache/spark/mllib/clustering/LDAModel.scala |  8 ++++----
 .../org/apache/spark/mllib/clustering/LDASuite.scala | 15 +++++++++++++++
 2 files changed, 19 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/e2531245/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
index 920b577..31c1d52 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -283,12 +283,12 @@ object LocalLDAModel extends Loader[LocalLDAModel] {
  */
 @Experimental
 class DistributedLDAModel private (
-    private val graph: Graph[LDA.TopicCounts, LDA.TokenCount],
-    private val globalTopicTotals: LDA.TopicCounts,
+    private[clustering] val graph: Graph[LDA.TopicCounts, LDA.TokenCount],
+    private[clustering] val globalTopicTotals: LDA.TopicCounts,
     val k: Int,
     val vocabSize: Int,
-    private val docConcentration: Double,
-    private val topicConcentration: Double,
+    private[clustering] val docConcentration: Double,
+    private[clustering] val topicConcentration: Double,
     private[spark] val iterationTimes: Array[Double]) extends LDAModel {
 
   import LDA._

http://git-wip-us.apache.org/repos/asf/spark/blob/e2531245/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
index da70d9b..376a87f 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.mllib.clustering
 import breeze.linalg.{DenseMatrix => BDM}
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.graphx.Edge
 import org.apache.spark.mllib.linalg.{DenseMatrix, Matrix, Vector, Vectors}
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.util.TestingUtils._
@@ -318,6 +319,20 @@ class LDASuite extends SparkFunSuite with MLlibTestSparkContext {
       assert(distributedModel.k === sameDistributedModel.k)
       assert(distributedModel.vocabSize === sameDistributedModel.vocabSize)
       assert(distributedModel.iterationTimes === sameDistributedModel.iterationTimes)
+      assert(distributedModel.docConcentration === sameDistributedModel.docConcentration)
+      assert(distributedModel.topicConcentration === sameDistributedModel.topicConcentration)
+      assert(distributedModel.globalTopicTotals === sameDistributedModel.globalTopicTotals)
+
+      val graph = distributedModel.graph
+      val sameGraph = sameDistributedModel.graph
+      assert(graph.vertices.sortByKey().collect() === sameGraph.vertices.sortByKey().collect())
+      val edge = graph.edges.map {
+        case Edge(sid: Long, did: Long, nos: Double) => (sid, did, nos)
+      }.sortBy(x => (x._1, x._2)).collect()
+      val sameEdge = sameGraph.edges.map {
+        case Edge(sid: Long, did: Long, nos: Double) => (sid, did, nos)
+      }.sortBy(x => (x._1, x._2)).collect()
+      assert(edge === sameEdge)
     } finally {
       Utils.deleteRecursively(tempDir1)
       Utils.deleteRecursively(tempDir2)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org