You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by dl...@apache.org on 2014/07/07 22:16:39 UTC

git commit: MAHOUT-1529: third collection of various edits against private branch

Repository: mahout
Updated Branches:
  refs/heads/master 63cebf76e -> e4ba7887f


MAHOUT-1529: third collection of various edits against private branch

Squashed commit of the following:

commit 4328aae135bc56c02b944c21af2cfd6629b262bf
Author: Dmitriy Lyubimov <dl...@apache.org>
Date:   Mon Jul 7 13:13:26 2014 -0700

    Various 1529-related stuff collected from private fork


Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/e4ba7887
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/e4ba7887
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/e4ba7887

Branch: refs/heads/master
Commit: e4ba7887fc6dbf17c3d73f8d4aa1045eeb48d53e
Parents: 63cebf7
Author: Dmitriy Lyubimov <dl...@apache.org>
Authored: Mon Jul 7 13:15:30 2014 -0700
Committer: Dmitriy Lyubimov <dl...@apache.org>
Committed: Mon Jul 7 13:15:30 2014 -0700

----------------------------------------------------------------------
 .../apache/mahout/math/decompositions/ALS.scala | 14 +++++++-----
 .../mahout/math/decompositions/package.scala    | 10 +++++++--
 .../mahout/math/scalabindings/MatrixOps.scala   |  9 ++++----
 .../mahout/math/scalabindings/RLikeOps.scala    |  3 +++
 .../mahout/math/scalabindings/VectorOps.scala   | 23 +++++++++++++++++---
 .../mahout/math/scalabindings/package.scala     | 15 +++++++++++--
 .../mahout/sparkbindings/SparkEngine.scala      |  2 +-
 .../mahout/sparkbindings/blas/package.scala     |  2 +-
 .../apache/mahout/sparkbindings/package.scala   |  9 +++++---
 .../mahout/math/decompositions/MathSuite.scala  | 10 ++++-----
 10 files changed, 71 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/math-scala/src/main/scala/org/apache/mahout/math/decompositions/ALS.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/decompositions/ALS.scala b/math-scala/src/main/scala/org/apache/mahout/math/decompositions/ALS.scala
index 5103e1c..5aed649 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/decompositions/ALS.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/decompositions/ALS.scala
@@ -29,7 +29,7 @@ import math._
 import org.apache.mahout.common.RandomUtils
 
 /** Simple ALS factorization algotithm. To solve, use train() method. */
-object ALS {
+private[math] object ALS {
 
   private val log = Logger.getLogger(ALS.getClass)
 
@@ -46,8 +46,13 @@ object ALS {
     def toTuple = (drmU, drmV, iterationsRMSE)
   }
 
+  /** Result class for in-core results */
+  class InCoreResult(val inCoreU: Matrix, inCoreV: Matrix, val iterationsRMSE: Iterable[Double]) {
+    def toTuple = (inCoreU, inCoreV, iterationsRMSE)
+  }
+
   /**
-   * Run ALS.
+   * Run Distributed ALS.
    * <P>
    *
    * Example:
@@ -69,7 +74,7 @@ object ALS {
    * @tparam K row key type of the input (100 is probably more than enough)
    * @return { @link org.apache.mahout.math.drm.decompositions.ALS.Result}
    */
-  def als[K: ClassTag](
+  def dals[K: ClassTag](
       drmInput: DrmLike[K],
       k: Int = 50,
       lambda: Double = 0.0,
@@ -80,7 +85,6 @@ object ALS {
     assert(convergenceThreshold < 1.0, "convergenceThreshold")
     assert(maxIterations >= 1, "maxIterations")
 
-
     val drmA = drmInput
     val drmAt = drmInput.t
 
@@ -101,7 +105,7 @@ object ALS {
     while (!stop && i < maxIterations) {
 
       // Alternate. This is really what ALS is.
-      if ( drmV != null) drmV.uncache()
+      if (drmV != null) drmV.uncache()
       drmV = (drmAt %*% drmU %*% solve(drmU.t %*% drmU -: diag(lambda, k))).checkpoint()
 
       drmU.uncache()

http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/math-scala/src/main/scala/org/apache/mahout/math/decompositions/package.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/decompositions/package.scala b/math-scala/src/main/scala/org/apache/mahout/math/decompositions/package.scala
index 852a977..1ed9695 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/decompositions/package.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/decompositions/package.scala
@@ -92,6 +92,12 @@ package object decompositions {
   def dspca[K: ClassTag](A: DrmLike[K], k: Int, p: Int = 15, q: Int = 0):
   (DrmLike[K], DrmLike[Int], Vector) = DSPCA.dspca(A, k, p, q)
 
+  /** Result for distributed ALS-type two-component factorization algorithms */
+  type FactorizationResult[K] = ALS.Result[K]
+
+  /** Result for distributed ALS-type two-component factorization algorithms, in-core matrices */
+  type FactorizationResultInCore = ALS.InCoreResult
+  
   /**
    * Run ALS.
    * <P>
@@ -121,7 +127,7 @@ package object decompositions {
       lambda: Double = 0.0,
       maxIterations: Int = 10,
       convergenceThreshold: Double = 0.10
-      ): ALS.Result[K] =
-    ALS.als(drmInput, k, lambda, maxIterations, convergenceThreshold)
+      ): FactorizationResult[K] =
+    ALS.dals(drmInput, k, lambda, maxIterations, convergenceThreshold)
 
 }

http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala
index 28acc5a..bb77ae1 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala
@@ -19,7 +19,7 @@ package org.apache.mahout.math.scalabindings
 
 import org.apache.mahout.math.{Matrices, QRDecomposition, Vector, Matrix}
 import scala.collection.JavaConversions._
-import org.apache.mahout.math.function.{VectorFunction, DoubleFunction, Functions}
+import org.apache.mahout.math.function.{DoubleDoubleFunction, VectorFunction, DoubleFunction, Functions}
 import scala.math._
 
 class MatrixOps(val m: Matrix) {
@@ -43,9 +43,10 @@ class MatrixOps(val m: Matrix) {
 
   def -=(that: Double) = +=(-that)
 
-  def -=:(that: Double) = m.assign(new DoubleFunction {
-    def apply(x: Double): Double = that - x
-  })
+  def -=:(that: Double) = m.assign(Functions.minus(that))
+
+  /** A := B - A which is -(A - B) */
+  def -=:(that: Matrix) = m.assign(that, Functions.chain(Functions.NEGATE, Functions.MINUS))
 
   def +(that: Matrix) = cloned += that
 

http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeOps.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeOps.scala
index e12fc0f..c96526f 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeOps.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeOps.scala
@@ -26,8 +26,11 @@ object RLikeOps {
 
   implicit def v2vOps(v: Vector) = new RLikeVectorOps(v)
 
+  implicit def el2elOps(el: Vector.Element) = new ElementOps(el)
+
   implicit def times2timesOps(m: MatrixTimesOps) = new RLikeTimesOps(m)
 
   implicit def m2mOps(m: Matrix) = new RLikeMatrixOps(m)
 
+
 }

http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/VectorOps.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/VectorOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/VectorOps.scala
index 0a81bcd..c1b5a69 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/VectorOps.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/VectorOps.scala
@@ -25,7 +25,7 @@ import org.apache.mahout.math.function.Functions
  * Syntactic sugar for mahout vectors
  * @param v Mahout vector
  */
-class VectorOps(val v: Vector) {
+class VectorOps(private[scalabindings] val v: Vector) {
 
   import RLikeOps._
 
@@ -118,5 +118,22 @@ class VectorOps(val v: Vector) {
 
 }
 
-object VectorOps {
-}
+class ElementOps(private[scalabindings] val el: Vector.Element) {
+
+  def apply = el.get()
+
+  def update(v: Double) = el.set(v)
+
+  def :=(v: Double) = el.set(v)
+
+  def +(that: Double) = el.get() + that
+
+  def -(that: Double) = el.get() - that
+
+  def :-(that: Double) = that - el.get()
+
+  def /(that: Double) = el.get() / that
+
+  def :/(that: Double) = that / el.get()
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala
index 2b7773b..8e0c07f 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala
@@ -130,7 +130,16 @@ package object scalabindings {
         case t: Product => t.productIterator.map(_.asInstanceOf[Number].doubleValue()).toArray
         case t: Vector => Array.tabulate(t.length)(t(_))
         case t: Array[Double] => t
-        case t: Iterable[Double] => t.toArray
+        case t: Iterable[_] =>
+          t.head match {
+            case ss: Double => t.asInstanceOf[Iterable[Double]].toArray
+            case vv: Vector =>
+              val m = new DenseMatrix(t.size, t.head.asInstanceOf[Vector].length)
+              t.asInstanceOf[Iterable[Vector]].view.zipWithIndex.foreach {
+                case (v, idx) => m(idx, ::) := v
+              }
+              return m
+          }
         case t: Array[Array[Double]] => if (rows.size == 1)
           return new DenseMatrix(t)
         else
@@ -138,7 +147,9 @@ package object scalabindings {
             "double[][] data parameter can be the only argument for dense()")
         case t:Array[Vector] =>
           val m = new DenseMatrix(t.size,t.head.length)
-          t.view.zipWithIndex.foreach({case(v,idx) => m(idx,::) := v})
+          t.view.zipWithIndex.foreach{
+            case(v,idx) => m(idx,::) := v
+          }
           return m
         case _ => throw new IllegalArgumentException("unsupported type in the inline Matrix initializer")
       }

http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/spark/src/main/scala/org/apache/mahout/sparkbindings/SparkEngine.scala
----------------------------------------------------------------------
diff --git a/spark/src/main/scala/org/apache/mahout/sparkbindings/SparkEngine.scala b/spark/src/main/scala/org/apache/mahout/sparkbindings/SparkEngine.scala
index dbdc934..b68a98e 100644
--- a/spark/src/main/scala/org/apache/mahout/sparkbindings/SparkEngine.scala
+++ b/spark/src/main/scala/org/apache/mahout/sparkbindings/SparkEngine.scala
@@ -219,7 +219,7 @@ object SparkEngine extends DistributedEngine {
     new CheckpointedDrmSpark[Long](rdd, nrow, ncol)
   }
 
-  private def cacheHint2Spark(cacheHint: CacheHint.CacheHint): StorageLevel = cacheHint match {
+  private[mahout] def cacheHint2Spark(cacheHint: CacheHint.CacheHint): StorageLevel = cacheHint match {
     case CacheHint.NONE => StorageLevel.NONE
     case CacheHint.DISK_ONLY => StorageLevel.DISK_ONLY
     case CacheHint.DISK_ONLY_2 => StorageLevel.DISK_ONLY_2

http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/spark/src/main/scala/org/apache/mahout/sparkbindings/blas/package.scala
----------------------------------------------------------------------
diff --git a/spark/src/main/scala/org/apache/mahout/sparkbindings/blas/package.scala b/spark/src/main/scala/org/apache/mahout/sparkbindings/blas/package.scala
index d2d5340..32d6fb5 100644
--- a/spark/src/main/scala/org/apache/mahout/sparkbindings/blas/package.scala
+++ b/spark/src/main/scala/org/apache/mahout/sparkbindings/blas/package.scala
@@ -20,7 +20,7 @@ package org.apache.mahout.sparkbindings
 import scala.reflect.ClassTag
 
 /**
- * This package contains distributed algorithms that distributed matrix expression optimizer picks
+ * This validation contains distributed algorithms that distributed matrix expression optimizer picks
  * from.
  */
 package object blas {

http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/spark/src/main/scala/org/apache/mahout/sparkbindings/package.scala
----------------------------------------------------------------------
diff --git a/spark/src/main/scala/org/apache/mahout/sparkbindings/package.scala b/spark/src/main/scala/org/apache/mahout/sparkbindings/package.scala
index e9fd7ac..8726766 100644
--- a/spark/src/main/scala/org/apache/mahout/sparkbindings/package.scala
+++ b/spark/src/main/scala/org/apache/mahout/sparkbindings/package.scala
@@ -52,7 +52,9 @@ package object sparkbindings {
    * @param customJars
    * @return
    */
-  def mahoutSparkContext(masterUrl: String, appName: String,
+  def mahoutSparkContext(
+      masterUrl: String,
+      appName: String,
       customJars: TraversableOnce[String] = Nil,
       sparkConf: SparkConf = new SparkConf(),
       addMahoutJars: Boolean = true
@@ -177,13 +179,14 @@ package object sparkbindings {
   def drmWrap[K : ClassTag](
       rdd: DrmRdd[K],
       nrow: Int = -1,
-      ncol: Int = -1
+      ncol: Int = -1,
+      cacheHint:CacheHint.CacheHint = CacheHint.NONE
       ): CheckpointedDrm[K] =
     new CheckpointedDrmSpark[K](
       rdd = rdd,
       _nrow = nrow,
       _ncol = ncol,
-      _cacheStorageLevel = StorageLevel.NONE
+      _cacheStorageLevel = SparkEngine.cacheHint2Spark(cacheHint)
     )
 
 

http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/spark/src/test/scala/org/apache/mahout/math/decompositions/MathSuite.scala
----------------------------------------------------------------------
diff --git a/spark/src/test/scala/org/apache/mahout/math/decompositions/MathSuite.scala b/spark/src/test/scala/org/apache/mahout/math/decompositions/MathSuite.scala
index 6060dfd..03c7190 100644
--- a/spark/src/test/scala/org/apache/mahout/math/decompositions/MathSuite.scala
+++ b/spark/src/test/scala/org/apache/mahout/math/decompositions/MathSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.mahout.math.decompositions
 
-import org.scalatest.{Matchers, FunSuite}
-import org.apache.mahout.sparkbindings.test.MahoutLocalContext
 import org.apache.mahout.math._
 import drm._
 import scalabindings._
@@ -27,6 +25,8 @@ import RLikeDrmOps._
 import org.apache.mahout.sparkbindings._
 import org.apache.mahout.common.RandomUtils
 import scala.math._
+import org.scalatest.{Matchers, FunSuite}
+import org.apache.mahout.sparkbindings.test.MahoutLocalContext
 
 class MathSuite extends FunSuite with Matchers with MahoutLocalContext {
 
@@ -171,7 +171,7 @@ class MathSuite extends FunSuite with Matchers with MahoutLocalContext {
 
   }
 
-  test("als") {
+  test("dals") {
 
     val rnd = RandomUtils.getRandom
 
@@ -202,8 +202,8 @@ class MathSuite extends FunSuite with Matchers with MahoutLocalContext {
     printf("ALS factorized approximation block:\n%s\n", predict(0 until 3, 0 until 3))
 
     val err = (inCoreA - predict).norm
-    printf ("norm of residuals %f\n",err)
-    printf ("train iteration rmses: %s\n", rmse)
+    printf("norm of residuals %f\n", err)
+    printf("train iteration rmses: %s\n", rmse)
 
     err should be < 1e-2