You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by dl...@apache.org on 2014/08/01 03:08:03 UTC
git commit: NOJIRA: more parameter naming conventions (style)
Repository: mahout
Updated Branches:
refs/heads/master 56a2305ed -> 66f164057
NOJIRA: more parameter naming conventions (style)
Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/66f16405
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/66f16405
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/66f16405
Branch: refs/heads/master
Commit: 66f164057e322d2e63ea02c35c9e30c3969e80b1
Parents: 56a2305
Author: Dmitriy Lyubimov <dl...@apache.org>
Authored: Thu Jul 31 18:06:27 2014 -0700
Committer: Dmitriy Lyubimov <dl...@apache.org>
Committed: Thu Jul 31 18:06:27 2014 -0700
----------------------------------------------------------------------
.../apache/mahout/math/decompositions/ALS.scala | 7 ++---
.../apache/mahout/math/decompositions/DQR.scala | 10 +++---
.../mahout/math/decompositions/DSPCA.scala | 22 +++++++-------
.../mahout/math/decompositions/DSSVD.scala | 18 +++++------
.../mahout/math/decompositions/package.scala | 32 ++++++++++++--------
.../DistributedDecompositionsSuiteBase.scala | 4 +--
6 files changed, 50 insertions(+), 43 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mahout/blob/66f16405/math-scala/src/main/scala/org/apache/mahout/math/decompositions/ALS.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/decompositions/ALS.scala b/math-scala/src/main/scala/org/apache/mahout/math/decompositions/ALS.scala
index 5aed649..4e2f45c 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/decompositions/ALS.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/decompositions/ALS.scala
@@ -65,7 +65,7 @@ private[math] object ALS {
* whichever earlier.
* <P>
*
- * @param drmInput The input matrix
+ * @param drmA The input matrix
* @param k required rank of decomposition (number of cols in U and V results)
* @param convergenceThreshold stop sooner if (rmse[i-1] - rmse[i])/rmse[i - 1] is less than this
* value. If <=0 then we won't compute RMSE and use convergence test.
@@ -75,7 +75,7 @@ private[math] object ALS {
* @return { @link org.apache.mahout.math.drm.decompositions.ALS.Result}
*/
def dals[K: ClassTag](
- drmInput: DrmLike[K],
+ drmA: DrmLike[K],
k: Int = 50,
lambda: Double = 0.0,
maxIterations: Int = 10,
@@ -85,8 +85,7 @@ private[math] object ALS {
assert(convergenceThreshold < 1.0, "convergenceThreshold")
assert(maxIterations >= 1, "maxIterations")
- val drmA = drmInput
- val drmAt = drmInput.t
+ val drmAt = drmA.t
// Initialize U and V so that they are identically distributed to A or A'
var drmU = drmA.mapBlock(ncol = k) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/66f16405/math-scala/src/main/scala/org/apache/mahout/math/decompositions/DQR.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/decompositions/DQR.scala b/math-scala/src/main/scala/org/apache/mahout/math/decompositions/DQR.scala
index 4ca99b1..7caa3dd 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/decompositions/DQR.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/decompositions/DQR.scala
@@ -38,14 +38,14 @@ object DQR {
* It also guarantees that Q is partitioned exactly the same way (and in same key-order) as A, so
* their RDD should be able to zip successfully.
*/
- def dqrThin[K: ClassTag](A: DrmLike[K], checkRankDeficiency: Boolean = true): (DrmLike[K], Matrix) = {
+ def dqrThin[K: ClassTag](drmA: DrmLike[K], checkRankDeficiency: Boolean = true): (DrmLike[K], Matrix) = {
- if (A.ncol > 5000)
+ if (drmA.ncol > 5000)
log.warn("A is too fat. A'A must fit in memory and easily broadcasted.")
- implicit val ctx = A.context
+ implicit val ctx = drmA.context
- val AtA = (A.t %*% A).checkpoint()
+ val AtA = (drmA.t %*% drmA).checkpoint()
val inCoreAtA = AtA.collect
if (log.isDebugEnabled) log.debug("A'A=\n%s\n".format(inCoreAtA))
@@ -64,7 +64,7 @@ object DQR {
// decompose A'A in the backend again.
// Compute Q = A*inv(L') -- we can do it blockwise.
- val Q = A.mapBlock() {
+ val Q = drmA.mapBlock() {
case (keys, block) => keys -> chol(bcastAtA).solveRight(block)
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/66f16405/math-scala/src/main/scala/org/apache/mahout/math/decompositions/DSPCA.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/decompositions/DSPCA.scala b/math-scala/src/main/scala/org/apache/mahout/math/decompositions/DSPCA.scala
index 37c218a..de7402d 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/decompositions/DSPCA.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/decompositions/DSPCA.scala
@@ -31,21 +31,21 @@ object DSPCA {
* Distributed Stochastic PCA decomposition algorithm. A logical reflow of the "SSVD-PCA options.pdf"
* document of the MAHOUT-817.
*
- * @param A input matrix A
+ * @param drmA input matrix A
* @param k request SSVD rank
* @param p oversampling parameter
* @param q number of power iterations (hint: use either 0 or 1)
* @return (U,V,s). Note that U, V are non-checkpointed matrices (i.e. one needs to actually use them
* e.g. save them to hdfs in order to trigger their computation.
*/
- def dspca[K: ClassTag](A: DrmLike[K], k: Int, p: Int = 15, q: Int = 0):
+ def dspca[K: ClassTag](drmA: DrmLike[K], k: Int, p: Int = 15, q: Int = 0):
(DrmLike[K], DrmLike[Int], Vector) = {
- val drmA = A.checkpoint()
- implicit val ctx = A.context
+ val drmAcp = drmA.checkpoint()
+ implicit val ctx = drmAcp.context
- val m = drmA.nrow
- val n = drmA.ncol
+ val m = drmAcp.nrow
+ val n = drmAcp.ncol
assert(k <= (m min n), "k cannot be greater than smaller of m, n.")
val pfxed = safeToNonNegInt((m min n) - k min p)
@@ -53,7 +53,7 @@ object DSPCA {
val r = k + pfxed
// Dataset mean
- val xi = drmA.colMeans
+ val xi = drmAcp.colMeans
// We represent Omega by its seed.
val omegaSeed = RandomUtils.getRandom().nextInt()
@@ -67,7 +67,7 @@ object DSPCA {
val bcastS_o = drmBroadcast(s_o)
val bcastXi = drmBroadcast(xi)
- var drmY = drmA.mapBlock(ncol = r) {
+ var drmY = drmAcp.mapBlock(ncol = r) {
case (keys, blockA) =>
val s_o:Vector = bcastS_o
val blockY = blockA %*% Matrices.symmetricUniformView(n, r, omegaSeed)
@@ -84,7 +84,7 @@ object DSPCA {
// This actually should be optimized as identically partitioned map-side A'B since A and Q should
// still be identically partitioned.
- var drmBt = (drmA.t %*% drmQ).checkpoint()
+ var drmBt = (drmAcp.t %*% drmQ).checkpoint()
var s_b = (drmBt.t %*% xi).collect(::, 0)
var bcastVarS_b = drmBroadcast(s_b)
@@ -112,7 +112,7 @@ object DSPCA {
drmY.uncache()
drmQ.uncache()
- drmY = (drmA %*% drmBt)
+ drmY = (drmAcp %*% drmBt)
// Fix Y by subtracting s_b from each row of the AB'
.mapBlock() {
case (keys, block) =>
@@ -130,7 +130,7 @@ object DSPCA {
// This on the other hand should be inner-join-and-map A'B optimization since A and Q_i are not
// identically partitioned anymore.
- drmBt = (drmA.t %*% drmQ).checkpoint()
+ drmBt = (drmAcp.t %*% drmQ).checkpoint()
s_b = (drmBt.t %*% xi).collect(::, 0)
bcastVarS_b = drmBroadcast(s_b)
http://git-wip-us.apache.org/repos/asf/mahout/blob/66f16405/math-scala/src/main/scala/org/apache/mahout/math/decompositions/DSSVD.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/decompositions/DSSVD.scala b/math-scala/src/main/scala/org/apache/mahout/math/decompositions/DSSVD.scala
index a158390..1abfb87 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/decompositions/DSSVD.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/decompositions/DSSVD.scala
@@ -13,20 +13,20 @@ object DSSVD {
/**
* Distributed Stochastic Singular Value decomposition algorithm.
*
- * @param A input matrix A
+ * @param drmA input matrix A
* @param k request SSVD rank
* @param p oversampling parameter
* @param q number of power iterations
* @return (U,V,s). Note that U, V are non-checkpointed matrices (i.e. one needs to actually use them
* e.g. save them to hdfs in order to trigger their computation.
*/
- def dssvd[K: ClassTag](A: DrmLike[K], k: Int, p: Int = 15, q: Int = 0):
+ def dssvd[K: ClassTag](drmA: DrmLike[K], k: Int, p: Int = 15, q: Int = 0):
(DrmLike[K], DrmLike[Int], Vector) = {
- val drmA = A.checkpoint()
+ val drmAcp = drmA.checkpoint()
- val m = drmA.nrow
- val n = drmA.ncol
+ val m = drmAcp.nrow
+ val n = drmAcp.ncol
assert(k <= (m min n), "k cannot be greater than smaller of m, n.")
val pfxed = safeToNonNegInt((m min n) - k min p)
@@ -39,7 +39,7 @@ object DSSVD {
// Compute Y = A*Omega. Instead of redistributing view, we redistribute the Omega seed only and
// instantiate the Omega random matrix view in the backend instead. That way serialized closure
// is much more compact.
- var drmY = drmA.mapBlock(ncol = r) {
+ var drmY = drmAcp.mapBlock(ncol = r) {
case (keys, blockA) =>
val blockY = blockA %*% Matrices.symmetricUniformView(n, r, omegaSeed)
keys -> blockY
@@ -51,19 +51,19 @@ object DSSVD {
// This actually should be optimized as identically partitioned map-side A'B since A and Q should
// still be identically partitioned.
- var drmBt = drmA.t %*% drmQ
+ var drmBt = drmAcp.t %*% drmQ
// Checkpoint B' if last iteration
if (q == 0) drmBt = drmBt.checkpoint()
for (i <- 0 until q) {
- drmY = drmA %*% drmBt
+ drmY = drmAcp %*% drmBt
drmQ = dqrThin(drmY.checkpoint())._1
// Checkpoint Q if last iteration
if (i == q - 1) drmQ = drmQ.checkpoint()
// This on the other hand should be inner-join-and-map A'B optimization since A and Q_i are not
// identically partitioned anymore.
- drmBt = drmA.t %*% drmQ
+ drmBt = drmAcp.t %*% drmQ
// Checkpoint B' if last iteration
if (i == q - 1) drmBt = drmBt.checkpoint()
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/66f16405/math-scala/src/main/scala/org/apache/mahout/math/decompositions/package.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/decompositions/package.scala b/math-scala/src/main/scala/org/apache/mahout/math/decompositions/package.scala
index a3a8787..a7b829f 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/decompositions/package.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/decompositions/package.scala
@@ -28,6 +28,15 @@ package object decompositions {
// ================ In-core decompositions ===================
+ /**
+ * In-core SSVD algorithm.
+ *
+ * @param a input matrix A
+ * @param k request SSVD rank
+ * @param p oversampling parameter
+ * @param q number of power iterations
+ * @return (U,V,s)
+ */
def ssvd(a: Matrix, k: Int, p: Int = 15, q: Int = 0) = SSVD.ssvd(a, k, p, q)
/**
@@ -50,7 +59,6 @@ package object decompositions {
def spca(a: Matrix, k: Int, p: Int = 15, q: Int = 0) =
SSVD.spca(a = a, k = k, p = p, q = q)
-
// ============== Distributed decompositions ===================
/**
@@ -62,35 +70,35 @@ package object decompositions {
* It also guarantees that Q is partitioned exactly the same way (and in same key-order) as A, so
* their RDD should be able to zip successfully.
*/
- def dqrThin[K: ClassTag](A: DrmLike[K], checkRankDeficiency: Boolean = true): (DrmLike[K], Matrix) =
- DQR.dqrThin(A, checkRankDeficiency)
+ def dqrThin[K: ClassTag](drmA: DrmLike[K], checkRankDeficiency: Boolean = true): (DrmLike[K], Matrix) =
+ DQR.dqrThin(drmA, checkRankDeficiency)
/**
* Distributed Stochastic Singular Value decomposition algorithm.
*
- * @param A input matrix A
+ * @param drmA input matrix A
* @param k request SSVD rank
* @param p oversampling parameter
* @param q number of power iterations
* @return (U,V,s). Note that U, V are non-checkpointed matrices (i.e. one needs to actually use them
* e.g. save them to hdfs in order to trigger their computation.
*/
- def dssvd[K: ClassTag](A: DrmLike[K], k: Int, p: Int = 15, q: Int = 0):
- (DrmLike[K], DrmLike[Int], Vector) = DSSVD.dssvd(A, k, p, q)
+ def dssvd[K: ClassTag](drmA: DrmLike[K], k: Int, p: Int = 15, q: Int = 0):
+ (DrmLike[K], DrmLike[Int], Vector) = DSSVD.dssvd(drmA, k, p, q)
/**
* Distributed Stochastic PCA decomposition algorithm. A logical reflow of the "SSVD-PCA options.pdf"
* document of the MAHOUT-817.
*
- * @param A input matrix A
+ * @param drmA input matrix A
* @param k request SSVD rank
* @param p oversampling parameter
* @param q number of power iterations (hint: use either 0 or 1)
* @return (U,V,s). Note that U, V are non-checkpointed matrices (i.e. one needs to actually use them
* e.g. save them to hdfs in order to trigger their computation.
*/
- def dspca[K: ClassTag](A: DrmLike[K], k: Int, p: Int = 15, q: Int = 0):
- (DrmLike[K], DrmLike[Int], Vector) = DSPCA.dspca(A, k, p, q)
+ def dspca[K: ClassTag](drmA: DrmLike[K], k: Int, p: Int = 15, q: Int = 0):
+ (DrmLike[K], DrmLike[Int], Vector) = DSPCA.dspca(drmA, k, p, q)
/** Result for distributed ALS-type two-component factorization algorithms */
type FactorizationResult[K] = ALS.Result[K]
@@ -112,7 +120,7 @@ package object decompositions {
* whichever earlier.
* <P>
*
- * @param drmInput The input matrix
+ * @param drmA The input matrix
* @param k required rank of decomposition (number of cols in U and V results)
* @param convergenceThreshold stop sooner if (rmse[i-1] - rmse[i])/rmse[i - 1] is less than this
* value. If <=0 then we won't compute RMSE and use convergence test.
@@ -122,12 +130,12 @@ package object decompositions {
* @return { @link org.apache.mahout.math.drm.decompositions.ALS.Result}
*/
def dals[K: ClassTag](
- drmInput: DrmLike[K],
+ drmA: DrmLike[K],
k: Int = 50,
lambda: Double = 0.0,
maxIterations: Int = 10,
convergenceThreshold: Double = 0.10
): FactorizationResult[K] =
- ALS.dals(drmInput, k, lambda, maxIterations, convergenceThreshold)
+ ALS.dals(drmA, k, lambda, maxIterations, convergenceThreshold)
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/66f16405/math-scala/src/test/scala/org/apache/mahout/math/decompositions/DistributedDecompositionsSuiteBase.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/test/scala/org/apache/mahout/math/decompositions/DistributedDecompositionsSuiteBase.scala b/math-scala/src/test/scala/org/apache/mahout/math/decompositions/DistributedDecompositionsSuiteBase.scala
index d37ab17..740f6fc 100644
--- a/math-scala/src/test/scala/org/apache/mahout/math/decompositions/DistributedDecompositionsSuiteBase.scala
+++ b/math-scala/src/test/scala/org/apache/mahout/math/decompositions/DistributedDecompositionsSuiteBase.scala
@@ -158,7 +158,7 @@ trait DistributedDecompositionsSuiteBase extends DistributedMahoutSuite with Mat
val k = 10
// Calculate just first 10 principal factors and reduce dimensionality.
- var (drmPCA, _, s) = dspca(A = drmInput, k = 10, p = spectrumLen, q = 1)
+ var (drmPCA, _, s) = dspca(drmA = drmInput, k = 10, p = spectrumLen, q = 1)
// Un-normalized pca data:
drmPCA = drmPCA %*% diagv(s)
@@ -199,7 +199,7 @@ trait DistributedDecompositionsSuiteBase extends DistributedMahoutSuite with Mat
val drmA = drmParallelize(inCoreA, numPartitions = 2)
// Decompose using ALS
- val (drmU, drmV, rmse) = dals(drmInput = drmA, k = 20).toTuple
+ val (drmU, drmV, rmse) = dals(drmA = drmA, k = 20).toTuple
val inCoreU = drmU.collect
val inCoreV = drmV.collect