You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2015/07/30 17:52:04 UTC
spark git commit: [SPARK-9225] [MLLIB] LDASuite needs unit tests for
empty documents
Repository: spark
Updated Branches:
refs/heads/master 9c0501c5d -> a6e53a9c8
[SPARK-9225] [MLLIB] LDASuite needs unit tests for empty documents
Add unit tests for running LDA with empty documents.
Both EMLDAOptimizer and OnlineLDAOptimizer are tested.
feynmanliang
Author: Meihua Wu <me...@umich.edu>
Closes #7620 from rotationsymmetry/SPARK-9225 and squashes the following commits:
3ed7c88 [Meihua Wu] Incorporate reviewer's further comments
f9432e8 [Meihua Wu] Incorporate reviewer's comments
8e1b9ec [Meihua Wu] Merge remote-tracking branch 'upstream/master' into SPARK-9225
ad55665 [Meihua Wu] Add unit tests for running LDA with empty documents
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a6e53a9c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a6e53a9c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a6e53a9c
Branch: refs/heads/master
Commit: a6e53a9c8b24326d1b6dca7a0e36ce6c643daa77
Parents: 9c0501c
Author: Meihua Wu <me...@umich.edu>
Authored: Thu Jul 30 08:52:01 2015 -0700
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Thu Jul 30 08:52:01 2015 -0700
----------------------------------------------------------------------
.../spark/mllib/clustering/LDASuite.scala | 40 ++++++++++++++++++++
1 file changed, 40 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/a6e53a9c/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
index b91c7ce..61d2edf 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
@@ -390,6 +390,46 @@ class LDASuite extends SparkFunSuite with MLlibTestSparkContext {
}
}
+ test("EMLDAOptimizer with empty docs") {
+ val vocabSize = 6
+ val emptyDocsArray = Array.fill(6)(Vectors.sparse(vocabSize, Array.empty, Array.empty))
+ val emptyDocs = emptyDocsArray
+ .zipWithIndex.map { case (wordCounts, docId) =>
+ (docId.toLong, wordCounts)
+ }
+ val distributedEmptyDocs = sc.parallelize(emptyDocs, 2)
+
+ val op = new EMLDAOptimizer()
+ val lda = new LDA()
+ .setK(3)
+ .setMaxIterations(5)
+ .setSeed(12345)
+ .setOptimizer(op)
+
+ val model = lda.run(distributedEmptyDocs)
+ assert(model.vocabSize === vocabSize)
+ }
+
+ test("OnlineLDAOptimizer with empty docs") {
+ val vocabSize = 6
+ val emptyDocsArray = Array.fill(6)(Vectors.sparse(vocabSize, Array.empty, Array.empty))
+ val emptyDocs = emptyDocsArray
+ .zipWithIndex.map { case (wordCounts, docId) =>
+ (docId.toLong, wordCounts)
+ }
+ val distributedEmptyDocs = sc.parallelize(emptyDocs, 2)
+
+ val op = new OnlineLDAOptimizer()
+ val lda = new LDA()
+ .setK(3)
+ .setMaxIterations(5)
+ .setSeed(12345)
+ .setOptimizer(op)
+
+ val model = lda.run(distributedEmptyDocs)
+ assert(model.vocabSize === vocabSize)
+ }
+
}
private[clustering] object LDASuite {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org