You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2013/07/17 02:31:20 UTC
[23/50] [abbrv] git commit: Make number of blocks in ALS configurable
and lower the default
Make number of blocks in ALS configurable and lower the default
Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/ed7fd501
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/ed7fd501
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/ed7fd501
Branch: refs/heads/master
Commit: ed7fd501cf7ece730cbdee6c152b917cf6bfb16a
Parents: 10c0593
Author: Matei Zaharia <ma...@gmail.com>
Authored: Mon Jul 15 00:30:10 2013 +0000
Committer: Matei Zaharia <ma...@gmail.com>
Committed: Mon Jul 15 00:30:10 2013 +0000
----------------------------------------------------------------------
mllib/src/main/scala/spark/mllib/recommendation/ALS.scala | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/ed7fd501/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala
index 21eb212..2abaf2f 100644
--- a/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala
@@ -91,7 +91,7 @@ class ALS private (var numBlocks: Int, var rank: Int, var iterations: Int, var l
*/
def train(ratings: RDD[(Int, Int, Double)]): MatrixFactorizationModel = {
val numBlocks = if (this.numBlocks == -1) {
- math.max(ratings.context.defaultParallelism, ratings.partitions.size)
+ math.max(ratings.context.defaultParallelism, ratings.partitions.size / 2)
} else {
this.numBlocks
}
@@ -384,12 +384,13 @@ object ALS {
}
def main(args: Array[String]) {
- if (args.length != 5) {
- println("Usage: ALS <master> <ratings_file> <rank> <iterations> <output_dir>")
+ if (args.length != 5 && args.length != 6) {
+ println("Usage: ALS <master> <ratings_file> <rank> <iterations> <output_dir> [<blocks>]")
System.exit(1)
}
val (master, ratingsFile, rank, iters, outputDir) =
(args(0), args(1), args(2).toInt, args(3).toInt, args(4))
+ val blocks = if (args.length == 6) args(5).toInt else -1
System.setProperty("spark.serializer", "spark.KryoSerializer")
System.setProperty("spark.locality.wait", "10000")
val sc = new SparkContext(master, "ALS")
@@ -397,7 +398,7 @@ object ALS {
val fields = line.split(',')
(fields(0).toInt, fields(1).toInt, fields(2).toDouble)
}
- val model = ALS.train(ratings, rank, iters)
+ val model = ALS.train(ratings, rank, iters, 0.01, blocks)
model.userFeatures.map{ case (id, vec) => id + "," + vec.mkString(" ") }
.saveAsTextFile(outputDir + "/userFeatures")
model.productFeatures.map{ case (id, vec) => id + "," + vec.mkString(" ") }