You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2014/01/23 19:48:31 UTC

[1/4] git commit: Replace commons-math with jblas

Updated Branches:
  refs/heads/master a1cd18512 -> a2b47dae6


Replace commons-math with jblas

Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/cc0fd331
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/cc0fd331
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/cc0fd331

Branch: refs/heads/master
Commit: cc0fd3317757eb745d0df8ba1510dda94cb5d655
Parents: 3184fac
Author: Jianping J Wang <ji...@gmail.com>
Authored: Thu Jan 23 19:44:30 2014 +0800
Committer: Jianping J Wang <ji...@gmail.com>
Committed: Thu Jan 23 19:44:30 2014 +0800

----------------------------------------------------------------------
 .../apache/spark/graphx/lib/SVDPlusPlus.scala   | 68 +++++++++++---------
 1 file changed, 36 insertions(+), 32 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/cc0fd331/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
----------------------------------------------------------------------
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
index 79280f8..ccd7de5 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.graphx.lib
 
 import scala.util.Random
-import org.apache.commons.math3.linear._
+import org.jblas.DoubleMatrix
 import org.apache.spark.rdd._
 import org.apache.spark.graphx._
 
@@ -52,15 +52,15 @@ object SVDPlusPlus {
    * @return a graph with vertex attributes containing the trained model
    */
   def run(edges: RDD[Edge[Double]], conf: Conf)
-    : (Graph[(RealVector, RealVector, Double, Double), Double], Double) =
+    : (Graph[(DoubleMatrix, DoubleMatrix, Double, Double), Double], Double) =
   {
     // Generate default vertex attribute
-    def defaultF(rank: Int): (RealVector, RealVector, Double, Double) = {
-      val v1 = new ArrayRealVector(rank)
-      val v2 = new ArrayRealVector(rank)
+    def defaultF(rank: Int): (DoubleMatrix, DoubleMatrix, Double, Double) = {
+      val v1 = new DoubleMatrix(rank)
+      val v2 = new DoubleMatrix(rank)
       for (i <- 0 until rank) {
-        v1.setEntry(i, Random.nextDouble())
-        v2.setEntry(i, Random.nextDouble())
+        v1.put(i, Random.nextDouble())
+        v2.put(i, Random.nextDouble())
       }
       (v1, v2, 0.0, 0.0)
     }
@@ -76,31 +76,32 @@ object SVDPlusPlus {
     // Calculate initial bias and norm
     val t0 = g.mapReduceTriplets(
       et => Iterator((et.srcId, (1L, et.attr)), (et.dstId, (1L, et.attr))),
-      (g1: (Long, Double), g2: (Long, Double)) => (g1._1 + g2._1, g1._2 + g2._2))
+        (g1: (Long, Double), g2: (Long, Double)) => (g1._1 + g2._1, g1._2 + g2._2))
 
     g = g.outerJoinVertices(t0) {
-      (vid: VertexId, vd: (RealVector, RealVector, Double, Double), msg: Option[(Long, Double)]) =>
+      (vid: VertexId, vd: (DoubleMatrix, DoubleMatrix, Double, Double),
+       msg: Option[(Long, Double)]) =>
         (vd._1, vd._2, msg.get._2 / msg.get._1, 1.0 / scala.math.sqrt(msg.get._1))
     }
 
     def mapTrainF(conf: Conf, u: Double)
-        (et: EdgeTriplet[(RealVector, RealVector, Double, Double), Double])
-      : Iterator[(VertexId, (RealVector, RealVector, Double))] = {
+        (et: EdgeTriplet[(DoubleMatrix, DoubleMatrix, Double, Double), Double])
+      : Iterator[(VertexId, (DoubleMatrix, DoubleMatrix, Double))] = {
       val (usr, itm) = (et.srcAttr, et.dstAttr)
       val (p, q) = (usr._1, itm._1)
-      var pred = u + usr._3 + itm._3 + q.dotProduct(usr._2)
+      var pred = u + usr._3 + itm._3 + q.dot(usr._2)
       pred = math.max(pred, conf.minVal)
       pred = math.min(pred, conf.maxVal)
       val err = et.attr - pred
-      val updateP = q.mapMultiply(err)
-        .subtract(p.mapMultiply(conf.gamma7))
-        .mapMultiply(conf.gamma2)
-      val updateQ = usr._2.mapMultiply(err)
-        .subtract(q.mapMultiply(conf.gamma7))
-        .mapMultiply(conf.gamma2)
-      val updateY = q.mapMultiply(err * usr._4)
-        .subtract(itm._2.mapMultiply(conf.gamma7))
-        .mapMultiply(conf.gamma2)
+      val updateP = q.mul(err)
+        .subColumnVector(p.mul(conf.gamma7))
+        .mul(conf.gamma2)
+      val updateQ = usr._2.mul(err)
+        .subColumnVector(q.mul(conf.gamma7))
+        .mul(conf.gamma2)
+      val updateY = q.mul(err * usr._4)
+        .subColumnVector(itm._2.mul(conf.gamma7))
+        .mul(conf.gamma2)
       Iterator((et.srcId, (updateP, updateY, (err - conf.gamma6 * usr._3) * conf.gamma1)),
         (et.dstId, (updateQ, updateY, (err - conf.gamma6 * itm._3) * conf.gamma1)))
     }
@@ -110,34 +111,37 @@ object SVDPlusPlus {
       g.cache()
       val t1 = g.mapReduceTriplets(
         et => Iterator((et.srcId, et.dstAttr._2)),
-        (g1: RealVector, g2: RealVector) => g1.add(g2))
+        (g1: DoubleMatrix, g2: DoubleMatrix) => g1.addColumnVector(g2))
       g = g.outerJoinVertices(t1) {
-        (vid: VertexId, vd: (RealVector, RealVector, Double, Double), msg: Option[RealVector]) =>
-          if (msg.isDefined) (vd._1, vd._1.add(msg.get.mapMultiply(vd._4)), vd._3, vd._4) else vd
+        (vid: VertexId, vd: (DoubleMatrix, DoubleMatrix, Double, Double),
+         msg: Option[DoubleMatrix]) =>
+          if (msg.isDefined) (vd._1, vd._1
+            .addColumnVector(msg.get.mul(vd._4)), vd._3, vd._4) else vd
       }
 
       // Phase 2, update p for user nodes and q, y for item nodes
       g.cache()
       val t2 = g.mapReduceTriplets(
         mapTrainF(conf, u),
-        (g1: (RealVector, RealVector, Double), g2: (RealVector, RealVector, Double)) =>
-          (g1._1.add(g2._1), g1._2.add(g2._2), g1._3 + g2._3))
+        (g1: (DoubleMatrix, DoubleMatrix, Double), g2: (DoubleMatrix, DoubleMatrix, Double)) =>
+          (g1._1.addColumnVector(g2._1), g1._2.addColumnVector(g2._2), g1._3 + g2._3))
       g = g.outerJoinVertices(t2) {
         (vid: VertexId,
-         vd: (RealVector, RealVector, Double, Double),
-         msg: Option[(RealVector, RealVector, Double)]) =>
-          (vd._1.add(msg.get._1), vd._2.add(msg.get._2), vd._3 + msg.get._3, vd._4)
+         vd: (DoubleMatrix, DoubleMatrix, Double, Double),
+         msg: Option[(DoubleMatrix, DoubleMatrix, Double)]) =>
+          (vd._1.addColumnVector(msg.get._1), vd._2.addColumnVector(msg.get._2),
+            vd._3 + msg.get._3, vd._4)
       }
     }
 
     // calculate error on training set
     def mapTestF(conf: Conf, u: Double)
-        (et: EdgeTriplet[(RealVector, RealVector, Double, Double), Double])
+        (et: EdgeTriplet[(DoubleMatrix, DoubleMatrix, Double, Double), Double])
       : Iterator[(VertexId, Double)] =
     {
       val (usr, itm) = (et.srcAttr, et.dstAttr)
       val (p, q) = (usr._1, itm._1)
-      var pred = u + usr._3 + itm._3 + q.dotProduct(usr._2)
+      var pred = u + usr._3 + itm._3 + q.dot(usr._2)
       pred = math.max(pred, conf.minVal)
       pred = math.min(pred, conf.maxVal)
       val err = (et.attr - pred) * (et.attr - pred)
@@ -146,7 +150,7 @@ object SVDPlusPlus {
     g.cache()
     val t3 = g.mapReduceTriplets(mapTestF(conf, u), (g1: Double, g2: Double) => g1 + g2)
     g = g.outerJoinVertices(t3) {
-      (vid: VertexId, vd: (RealVector, RealVector, Double, Double), msg: Option[Double]) =>
+      (vid: VertexId, vd: (DoubleMatrix, DoubleMatrix, Double, Double), msg: Option[Double]) =>
         if (msg.isDefined) (vd._1, vd._2, vd._3, msg.get) else vd
     }
 


[4/4] git commit: Merge pull request #499 from jianpingjwang/dev1

Posted by rx...@apache.org.
Merge pull request #499 from jianpingjwang/dev1

Replace commons-math with jblas in SVDPlusPlus


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/a2b47dae
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/a2b47dae
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/a2b47dae

Branch: refs/heads/master
Commit: a2b47dae66a437f02bc053e9bde5c1472cff0fc6
Parents: a1cd185 19a01c1
Author: Reynold Xin <rx...@apache.org>
Authored: Thu Jan 23 10:48:26 2014 -0800
Committer: Reynold Xin <rx...@apache.org>
Committed: Thu Jan 23 10:48:26 2014 -0800

----------------------------------------------------------------------
 graphx/pom.xml                                  |  7 +-
 .../apache/spark/graphx/lib/SVDPlusPlus.scala   | 68 +++++++++++---------
 project/SparkBuild.scala                        |  2 +-
 3 files changed, 40 insertions(+), 37 deletions(-)
----------------------------------------------------------------------



[2/4] git commit: Add jblas dependency

Posted by rx...@apache.org.
Add jblas dependency

Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/a5a513e2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/a5a513e2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/a5a513e2

Branch: refs/heads/master
Commit: a5a513e25e0742400788afd975dd255face2eac4
Parents: cc0fd33
Author: Jianping J Wang <ji...@gmail.com>
Authored: Thu Jan 23 19:48:39 2014 +0800
Committer: Jianping J Wang <ji...@gmail.com>
Committed: Thu Jan 23 19:48:39 2014 +0800

----------------------------------------------------------------------
 graphx/pom.xml | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/a5a513e2/graphx/pom.xml
----------------------------------------------------------------------
diff --git a/graphx/pom.xml b/graphx/pom.xml
index d97dbb8..baa240a 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -38,15 +38,14 @@
       <version>${project.version}</version>
     </dependency>
     <dependency>
-      <groupId>org.apache.commons</groupId>
-      <artifactId>commons-math3</artifactId>
-      <version>3.2</version>
+      <groupId>org.jblas</groupId>
+      <artifactId>jblas</artifactId>
+      <version>1.2.3</version>
     </dependency>
     <dependency>
       <groupId>org.eclipse.jetty</groupId>
       <artifactId>jetty-server</artifactId>
     </dependency>
-
     <dependency>
       <groupId>org.scalatest</groupId>
       <artifactId>scalatest_${scala.binary.version}</artifactId>


[3/4] git commit: Add jblas dependency

Posted by rx...@apache.org.
Add jblas dependency

Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/19a01c1b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/19a01c1b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/19a01c1b

Branch: refs/heads/master
Commit: 19a01c1b1d6ba5791337f51eae1909918d80cef4
Parents: a5a513e
Author: Jianping J Wang <ji...@gmail.com>
Authored: Thu Jan 23 19:54:01 2014 +0800
Committer: Jianping J Wang <ji...@gmail.com>
Committed: Thu Jan 23 19:54:01 2014 +0800

----------------------------------------------------------------------
 project/SparkBuild.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/19a01c1b/project/SparkBuild.scala
----------------------------------------------------------------------
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 76e3973..46a1c64 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -318,7 +318,7 @@ object SparkBuild extends Build {
   def graphxSettings = sharedSettings ++ Seq(
     name := "spark-graphx",
     libraryDependencies ++= Seq(
-      "org.apache.commons" % "commons-math3" % "3.2"
+      "org.jblas" % "jblas" % "1.2.3"
     )
   )