You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by dl...@apache.org on 2014/07/07 22:16:39 UTC
git commit: MAHOUT-1529: third collection of various edits against
private branch
Repository: mahout
Updated Branches:
refs/heads/master 63cebf76e -> e4ba7887f
MAHOUT-1529: third collection of various edits against private branch
Squashed commit of the following:
commit 4328aae135bc56c02b944c21af2cfd6629b262bf
Author: Dmitriy Lyubimov <dl...@apache.org>
Date: Mon Jul 7 13:13:26 2014 -0700
Various 1529-related stuff collected from private fork
Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/e4ba7887
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/e4ba7887
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/e4ba7887
Branch: refs/heads/master
Commit: e4ba7887fc6dbf17c3d73f8d4aa1045eeb48d53e
Parents: 63cebf7
Author: Dmitriy Lyubimov <dl...@apache.org>
Authored: Mon Jul 7 13:15:30 2014 -0700
Committer: Dmitriy Lyubimov <dl...@apache.org>
Committed: Mon Jul 7 13:15:30 2014 -0700
----------------------------------------------------------------------
.../apache/mahout/math/decompositions/ALS.scala | 14 +++++++-----
.../mahout/math/decompositions/package.scala | 10 +++++++--
.../mahout/math/scalabindings/MatrixOps.scala | 9 ++++----
.../mahout/math/scalabindings/RLikeOps.scala | 3 +++
.../mahout/math/scalabindings/VectorOps.scala | 23 +++++++++++++++++---
.../mahout/math/scalabindings/package.scala | 15 +++++++++++--
.../mahout/sparkbindings/SparkEngine.scala | 2 +-
.../mahout/sparkbindings/blas/package.scala | 2 +-
.../apache/mahout/sparkbindings/package.scala | 9 +++++---
.../mahout/math/decompositions/MathSuite.scala | 10 ++++-----
10 files changed, 71 insertions(+), 26 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/math-scala/src/main/scala/org/apache/mahout/math/decompositions/ALS.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/decompositions/ALS.scala b/math-scala/src/main/scala/org/apache/mahout/math/decompositions/ALS.scala
index 5103e1c..5aed649 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/decompositions/ALS.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/decompositions/ALS.scala
@@ -29,7 +29,7 @@ import math._
import org.apache.mahout.common.RandomUtils
/** Simple ALS factorization algotithm. To solve, use train() method. */
-object ALS {
+private[math] object ALS {
private val log = Logger.getLogger(ALS.getClass)
@@ -46,8 +46,13 @@ object ALS {
def toTuple = (drmU, drmV, iterationsRMSE)
}
+ /** Result class for in-core results */
+ class InCoreResult(val inCoreU: Matrix, inCoreV: Matrix, val iterationsRMSE: Iterable[Double]) {
+ def toTuple = (inCoreU, inCoreV, iterationsRMSE)
+ }
+
/**
- * Run ALS.
+ * Run Distributed ALS.
* <P>
*
* Example:
@@ -69,7 +74,7 @@ object ALS {
* @tparam K row key type of the input (100 is probably more than enough)
* @return { @link org.apache.mahout.math.drm.decompositions.ALS.Result}
*/
- def als[K: ClassTag](
+ def dals[K: ClassTag](
drmInput: DrmLike[K],
k: Int = 50,
lambda: Double = 0.0,
@@ -80,7 +85,6 @@ object ALS {
assert(convergenceThreshold < 1.0, "convergenceThreshold")
assert(maxIterations >= 1, "maxIterations")
-
val drmA = drmInput
val drmAt = drmInput.t
@@ -101,7 +105,7 @@ object ALS {
while (!stop && i < maxIterations) {
// Alternate. This is really what ALS is.
- if ( drmV != null) drmV.uncache()
+ if (drmV != null) drmV.uncache()
drmV = (drmAt %*% drmU %*% solve(drmU.t %*% drmU -: diag(lambda, k))).checkpoint()
drmU.uncache()
http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/math-scala/src/main/scala/org/apache/mahout/math/decompositions/package.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/decompositions/package.scala b/math-scala/src/main/scala/org/apache/mahout/math/decompositions/package.scala
index 852a977..1ed9695 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/decompositions/package.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/decompositions/package.scala
@@ -92,6 +92,12 @@ package object decompositions {
def dspca[K: ClassTag](A: DrmLike[K], k: Int, p: Int = 15, q: Int = 0):
(DrmLike[K], DrmLike[Int], Vector) = DSPCA.dspca(A, k, p, q)
+ /** Result for distributed ALS-type two-component factorization algorithms */
+ type FactorizationResult[K] = ALS.Result[K]
+
+ /** Result for distributed ALS-type two-component factorization algorithms, in-core matrices */
+ type FactorizationResultInCore = ALS.InCoreResult
+
/**
* Run ALS.
* <P>
@@ -121,7 +127,7 @@ package object decompositions {
lambda: Double = 0.0,
maxIterations: Int = 10,
convergenceThreshold: Double = 0.10
- ): ALS.Result[K] =
- ALS.als(drmInput, k, lambda, maxIterations, convergenceThreshold)
+ ): FactorizationResult[K] =
+ ALS.dals(drmInput, k, lambda, maxIterations, convergenceThreshold)
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala
index 28acc5a..bb77ae1 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala
@@ -19,7 +19,7 @@ package org.apache.mahout.math.scalabindings
import org.apache.mahout.math.{Matrices, QRDecomposition, Vector, Matrix}
import scala.collection.JavaConversions._
-import org.apache.mahout.math.function.{VectorFunction, DoubleFunction, Functions}
+import org.apache.mahout.math.function.{DoubleDoubleFunction, VectorFunction, DoubleFunction, Functions}
import scala.math._
class MatrixOps(val m: Matrix) {
@@ -43,9 +43,10 @@ class MatrixOps(val m: Matrix) {
def -=(that: Double) = +=(-that)
- def -=:(that: Double) = m.assign(new DoubleFunction {
- def apply(x: Double): Double = that - x
- })
+ def -=:(that: Double) = m.assign(Functions.minus(that))
+
+ /** A := B - A which is -(A - B) */
+ def -=:(that: Matrix) = m.assign(that, Functions.chain(Functions.NEGATE, Functions.MINUS))
def +(that: Matrix) = cloned += that
http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeOps.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeOps.scala
index e12fc0f..c96526f 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeOps.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeOps.scala
@@ -26,8 +26,11 @@ object RLikeOps {
implicit def v2vOps(v: Vector) = new RLikeVectorOps(v)
+ implicit def el2elOps(el: Vector.Element) = new ElementOps(el)
+
implicit def times2timesOps(m: MatrixTimesOps) = new RLikeTimesOps(m)
implicit def m2mOps(m: Matrix) = new RLikeMatrixOps(m)
+
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/VectorOps.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/VectorOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/VectorOps.scala
index 0a81bcd..c1b5a69 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/VectorOps.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/VectorOps.scala
@@ -25,7 +25,7 @@ import org.apache.mahout.math.function.Functions
* Syntactic sugar for mahout vectors
* @param v Mahout vector
*/
-class VectorOps(val v: Vector) {
+class VectorOps(private[scalabindings] val v: Vector) {
import RLikeOps._
@@ -118,5 +118,22 @@ class VectorOps(val v: Vector) {
}
-object VectorOps {
-}
+class ElementOps(private[scalabindings] val el: Vector.Element) {
+
+ def apply = el.get()
+
+ def update(v: Double) = el.set(v)
+
+ def :=(v: Double) = el.set(v)
+
+ def +(that: Double) = el.get() + that
+
+ def -(that: Double) = el.get() - that
+
+ def :-(that: Double) = that - el.get()
+
+ def /(that: Double) = el.get() / that
+
+ def :/(that: Double) = that / el.get()
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala
index 2b7773b..8e0c07f 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala
@@ -130,7 +130,16 @@ package object scalabindings {
case t: Product => t.productIterator.map(_.asInstanceOf[Number].doubleValue()).toArray
case t: Vector => Array.tabulate(t.length)(t(_))
case t: Array[Double] => t
- case t: Iterable[Double] => t.toArray
+ case t: Iterable[_] =>
+ t.head match {
+ case ss: Double => t.asInstanceOf[Iterable[Double]].toArray
+ case vv: Vector =>
+ val m = new DenseMatrix(t.size, t.head.asInstanceOf[Vector].length)
+ t.asInstanceOf[Iterable[Vector]].view.zipWithIndex.foreach {
+ case (v, idx) => m(idx, ::) := v
+ }
+ return m
+ }
case t: Array[Array[Double]] => if (rows.size == 1)
return new DenseMatrix(t)
else
@@ -138,7 +147,9 @@ package object scalabindings {
"double[][] data parameter can be the only argument for dense()")
case t:Array[Vector] =>
val m = new DenseMatrix(t.size,t.head.length)
- t.view.zipWithIndex.foreach({case(v,idx) => m(idx,::) := v})
+ t.view.zipWithIndex.foreach{
+ case(v,idx) => m(idx,::) := v
+ }
return m
case _ => throw new IllegalArgumentException("unsupported type in the inline Matrix initializer")
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/spark/src/main/scala/org/apache/mahout/sparkbindings/SparkEngine.scala
----------------------------------------------------------------------
diff --git a/spark/src/main/scala/org/apache/mahout/sparkbindings/SparkEngine.scala b/spark/src/main/scala/org/apache/mahout/sparkbindings/SparkEngine.scala
index dbdc934..b68a98e 100644
--- a/spark/src/main/scala/org/apache/mahout/sparkbindings/SparkEngine.scala
+++ b/spark/src/main/scala/org/apache/mahout/sparkbindings/SparkEngine.scala
@@ -219,7 +219,7 @@ object SparkEngine extends DistributedEngine {
new CheckpointedDrmSpark[Long](rdd, nrow, ncol)
}
- private def cacheHint2Spark(cacheHint: CacheHint.CacheHint): StorageLevel = cacheHint match {
+ private[mahout] def cacheHint2Spark(cacheHint: CacheHint.CacheHint): StorageLevel = cacheHint match {
case CacheHint.NONE => StorageLevel.NONE
case CacheHint.DISK_ONLY => StorageLevel.DISK_ONLY
case CacheHint.DISK_ONLY_2 => StorageLevel.DISK_ONLY_2
http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/spark/src/main/scala/org/apache/mahout/sparkbindings/blas/package.scala
----------------------------------------------------------------------
diff --git a/spark/src/main/scala/org/apache/mahout/sparkbindings/blas/package.scala b/spark/src/main/scala/org/apache/mahout/sparkbindings/blas/package.scala
index d2d5340..32d6fb5 100644
--- a/spark/src/main/scala/org/apache/mahout/sparkbindings/blas/package.scala
+++ b/spark/src/main/scala/org/apache/mahout/sparkbindings/blas/package.scala
@@ -20,7 +20,7 @@ package org.apache.mahout.sparkbindings
import scala.reflect.ClassTag
/**
- * This package contains distributed algorithms that distributed matrix expression optimizer picks
+ * This validation contains distributed algorithms that distributed matrix expression optimizer picks
* from.
*/
package object blas {
http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/spark/src/main/scala/org/apache/mahout/sparkbindings/package.scala
----------------------------------------------------------------------
diff --git a/spark/src/main/scala/org/apache/mahout/sparkbindings/package.scala b/spark/src/main/scala/org/apache/mahout/sparkbindings/package.scala
index e9fd7ac..8726766 100644
--- a/spark/src/main/scala/org/apache/mahout/sparkbindings/package.scala
+++ b/spark/src/main/scala/org/apache/mahout/sparkbindings/package.scala
@@ -52,7 +52,9 @@ package object sparkbindings {
* @param customJars
* @return
*/
- def mahoutSparkContext(masterUrl: String, appName: String,
+ def mahoutSparkContext(
+ masterUrl: String,
+ appName: String,
customJars: TraversableOnce[String] = Nil,
sparkConf: SparkConf = new SparkConf(),
addMahoutJars: Boolean = true
@@ -177,13 +179,14 @@ package object sparkbindings {
def drmWrap[K : ClassTag](
rdd: DrmRdd[K],
nrow: Int = -1,
- ncol: Int = -1
+ ncol: Int = -1,
+ cacheHint:CacheHint.CacheHint = CacheHint.NONE
): CheckpointedDrm[K] =
new CheckpointedDrmSpark[K](
rdd = rdd,
_nrow = nrow,
_ncol = ncol,
- _cacheStorageLevel = StorageLevel.NONE
+ _cacheStorageLevel = SparkEngine.cacheHint2Spark(cacheHint)
)
http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/spark/src/test/scala/org/apache/mahout/math/decompositions/MathSuite.scala
----------------------------------------------------------------------
diff --git a/spark/src/test/scala/org/apache/mahout/math/decompositions/MathSuite.scala b/spark/src/test/scala/org/apache/mahout/math/decompositions/MathSuite.scala
index 6060dfd..03c7190 100644
--- a/spark/src/test/scala/org/apache/mahout/math/decompositions/MathSuite.scala
+++ b/spark/src/test/scala/org/apache/mahout/math/decompositions/MathSuite.scala
@@ -17,8 +17,6 @@
package org.apache.mahout.math.decompositions
-import org.scalatest.{Matchers, FunSuite}
-import org.apache.mahout.sparkbindings.test.MahoutLocalContext
import org.apache.mahout.math._
import drm._
import scalabindings._
@@ -27,6 +25,8 @@ import RLikeDrmOps._
import org.apache.mahout.sparkbindings._
import org.apache.mahout.common.RandomUtils
import scala.math._
+import org.scalatest.{Matchers, FunSuite}
+import org.apache.mahout.sparkbindings.test.MahoutLocalContext
class MathSuite extends FunSuite with Matchers with MahoutLocalContext {
@@ -171,7 +171,7 @@ class MathSuite extends FunSuite with Matchers with MahoutLocalContext {
}
- test("als") {
+ test("dals") {
val rnd = RandomUtils.getRandom
@@ -202,8 +202,8 @@ class MathSuite extends FunSuite with Matchers with MahoutLocalContext {
printf("ALS factorized approximation block:\n%s\n", predict(0 until 3, 0 until 3))
val err = (inCoreA - predict).norm
- printf ("norm of residuals %f\n",err)
- printf ("train iteration rmses: %s\n", rmse)
+ printf("norm of residuals %f\n", err)
+ printf("train iteration rmses: %s\n", rmse)
err should be < 1e-2