You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by dl...@apache.org on 2015/07/02 01:01:27 UTC
mahout git commit: MAHOUT-1754: Distance and squared distance
matrices routines (dlyubimov)
Repository: mahout
Updated Branches:
refs/heads/mahout-0.10.x 31ec01973 -> 349b94d88
MAHOUT-1754: Distance and squared distance matrices routines (dlyubimov)
MAHOUT-1753: First and second moment routines (dlyubimov)
MAHOUT-1746: mxA ^ 2, mxA ^ 0.5 to mean the same thing as mxA * mxA and mxA ::= sqrt _ (dlyubimov)
This closes apache/mahout#145
Squashed commit of the following:
commit a6fc57810abfdcf854c2e06a4a8aa87e357901a0
Author: Dmitriy Lyubimov <dl...@apache.org>
Date: Wed Jun 24 22:49:20 2015 -0700
formula typo fix.
commit 8bd70c043e7486ecf20f26f98094934fb16a51f2
Author: Dmitriy Lyubimov <dl...@apache.org>
Date: Wed Jun 24 16:45:44 2015 -0700
Adding comments per public review request
commit 9394ac997f014f3e32439cbdd4e40deb9f03d6c5
Author: Dmitriy Lyubimov <dl...@apache.org>
Date: Tue Jun 23 16:02:45 2015 -0700
adding `dist` functions
commit 7c5576ce1536e8873c08e0e35b6fc032b278ed5d
Author: Dmitriy Lyubimov <dl...@apache.org>
Date: Tue Jun 23 15:38:28 2015 -0700
un-privatizing some of new functions.
commit 526bfd626fbc398886b1b5dec37c6e2939ea7c4a
Author: Dmitriy Lyubimov <dl...@apache.org>
Date: Tue Jun 23 14:40:32 2015 -0700
MAHOUT-1746: a ^ 2 to mean a * a not pow (a, 2.0)
commit 806000a700450b7186f511486ca1ca828225abb3
Author: Dmitriy Lyubimov <dl...@apache.org>
Date: Mon Jun 22 18:03:51 2015 -0700
Added distance functions
commit 637e050ed3a52b06e2ce1f691c5dfb6a77074a43
Author: Dmitriy Lyubimov <dl...@apache.org>
Date: Mon Jun 22 11:56:38 2015 -0700
First port of mu-variance-covariance functions
Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/349b94d8
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/349b94d8
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/349b94d8
Branch: refs/heads/mahout-0.10.x
Commit: 349b94d887c1fb11fd00318717531f5cd25eab57
Parents: 31ec019
Author: Dmitriy Lyubimov <dl...@apache.org>
Authored: Wed Jul 1 16:01:31 2015 -0700
Committer: Dmitriy Lyubimov <dl...@apache.org>
Committed: Wed Jul 1 16:02:20 2015 -0700
----------------------------------------------------------------------
CHANGELOG | 6 +
.../apache/mahout/math/drm/RLikeDrmOps.scala | 10 +-
.../org/apache/mahout/math/drm/package.scala | 161 ++++++++++++++++++-
.../math/scalabindings/RLikeMatrixOps.scala | 11 +-
.../math/scalabindings/RLikeVectorOps.scala | 10 +-
.../mahout/math/scalabindings/package.scala | 92 ++++++++---
.../mahout/math/drm/DrmLikeOpsSuiteBase.scala | 24 +++
.../mahout/math/scalabindings/MathSuite.scala | 39 ++++-
8 files changed, 321 insertions(+), 32 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mahout/blob/349b94d8/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index dd65b0e..38c7d17 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,12 @@ Mahout Change Log
Release 0.10.2 - unreleased
+ MAHOUT-1754: Distance and squared distance matrices routines (dlyubimov)
+
+ MAHOUT-1753: First and second moment routines (dlyubimov)
+
+ MAHOUT-1746: mxA ^ 2, mxA ^ 0.5 to mean the same thing as mxA * mxA and mxA ::= sqrt _ (dlyubimov)
+
MAHOUT-1660: Hadoop1HDFSUtil.readDRMHEader should be taking Hadoop conf (dlyubimov)
MAHOUT-1713: Performance and parallelization improvements for AB', A'B, A'A spark physical operators (dlyubimov)
http://git-wip-us.apache.org/repos/asf/mahout/blob/349b94d8/math-scala/src/main/scala/org/apache/mahout/math/drm/RLikeDrmOps.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/RLikeDrmOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/drm/RLikeDrmOps.scala
index 7927e51..aac7da1 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/drm/RLikeDrmOps.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/drm/RLikeDrmOps.scala
@@ -50,7 +50,15 @@ class RLikeDrmOps[K: ClassTag](drm: DrmLike[K]) extends DrmLikeOps[K](drm) {
def *:(that: Double): DrmLike[K] = OpAewUnaryFunc[K](A = this, f = that * _)
- def ^(that: Double): DrmLike[K] = OpAewUnaryFunc[K](A = this, f = math.pow(_, that))
+ def ^(that: Double): DrmLike[K] = that match {
+ // Special handling of x ^2 and x ^ 0.5: we want consistent handling of x ^ 2 and x * x since
+ // pow(x,2) function return results different from x * x; but much of the code uses this
+ // interchangeably. Not having this done will create things like NaN entries on main diagonal
+ // of a distance matrix.
+ case 2.0 ⇒ OpAewUnaryFunc[K](A = this, f = x ⇒ x * x)
+ case 0.5 ⇒ OpAewUnaryFunc[K](A = this, f = math.sqrt _)
+ case _ ⇒ OpAewUnaryFunc[K](A = this, f = math.pow(_, that))
+ }
def /(that: Double): DrmLike[K] = OpAewUnaryFunc[K](A = this, f = _ / that, evalZeros = that == 0.0)
http://git-wip-us.apache.org/repos/asf/mahout/blob/349b94d8/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala b/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala
index d865b58..e972dd8 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala
@@ -17,7 +17,7 @@
package org.apache.mahout.math
-import org.apache.mahout.math.drm.DistributedContext
+import org.apache.mahout.math.drm._
import org.apache.mahout.math.indexeddataset.{IndexedDataset, DefaultIndexedDatasetReadSchema, Schema}
import org.apache.mahout.math.scalabindings.RLikeOps._
import org.apache.mahout.math.scalabindings._
@@ -160,6 +160,165 @@ package object drm {
def dsqrt[K: ClassTag](drmA: DrmLike[K]): DrmLike[K] = new OpAewUnaryFunc[K](drmA, math.sqrt)
def dsignum[K: ClassTag](drmA: DrmLike[K]): DrmLike[K] = new OpAewUnaryFunc[K](drmA, math.signum)
+
+ ///////////////////////////////////////////////////////////
+ // Misc. math utilities.
+
+ /**
+ * Compute column wise means and variances -- distributed version.
+ *
+ * @param drmA Note: will pin input to cache if not yet pinned.
+ * @tparam K
+ * @return colMeans → colVariances
+ */
+ def dcolMeanVars[K: ClassTag](drmA: DrmLike[K]): (Vector, Vector) = {
+
+ import RLikeDrmOps._
+
+ val drmAcp = drmA.checkpoint()
+
+ val mu = drmAcp colMeans
+
+ // Compute variance using mean(x^2) - mean(x)^2
+ val variances = (drmAcp ^ 2 colMeans) -=: mu * mu
+
+ mu → variances
+ }
+
+ /**
+ * Compute column wise means and standard deviations -- distributed version.
+ * @param drmA note: input will be pinned to cache if not yet pinned
+ * @return colMeans → colStdevs
+ */
+ def dcolMeanStdevs[K: ClassTag](drmA: DrmLike[K]): (Vector, Vector) = {
+ val (mu, vars) = dcolMeanVars(drmA)
+ mu → (vars ::= math.sqrt _)
+ }
+
+ /**
+ * Thin column-wise mean and covariance matrix computation. Same as [[dcolMeanCov()]] but suited for
+ * thin and tall inputs where covariance matrix can be reduced and finalized in driver memory.
+ *
+ * @param drmA note: will pin input to cache if not yet pinned.
+ * @return mean → covariance matrix (in core)
+ */
+ def dcolMeanCovThin[K: ClassTag](drmA: DrmLike[K]):(Vector, Matrix) = {
+
+ import RLikeDrmOps._
+
+ val drmAcp = drmA.checkpoint()
+ val mu = drmAcp colMeans
+ val mxCov = (drmAcp.t %*% drmAcp).collect /= drmAcp.nrow -= (mu cross mu)
+ mu → mxCov
+ }
+
+ /**
+ * Compute COV(X) matrix and mean of row-wise data set. X is presented as row-wise input matrix A.
+ *
+ * This is a "wide" procedure, covariance matrix is returned as a DRM.
+ *
+ * @param drmA note: will pin input into cache if not yet pinned.
+ * @return mean → covariance DRM
+ */
+ def dcolMeanCov[K: ClassTag](drmA: DrmLike[K]): (Vector, DrmLike[Int]) = {
+
+ import RLikeDrmOps._
+
+ implicit val ctx = drmA.context
+ val drmAcp = drmA.checkpoint()
+
+ val bcastMu = drmBroadcast(drmAcp colMeans)
+
+ // We use multivaraite analogue COV(X)=E(XX')-mu*mu'. In our case E(XX') = (A'A)/A.nrow.
+ // Compute E(XX')
+ val drmSigma = (drmAcp.t %*% drmAcp / drmAcp.nrow)
+
+ // Subtract mu*mu'. In this case we assume mu*mu' may still be big enough to be treated by
+ // driver alone, so we redistribute this operation as well. Hence it may look a bit cryptic.
+ .mapBlock() { case (keys, block) ⇒
+
+ // Pin mu as vector reference to memory.
+ val mu:Vector = bcastMu
+
+ keys → (block := { (r, c, v) ⇒ v - mu(keys(r)) * mu(c) })
+ }
+
+ // return (mu, cov(X) ("bigSigma")).
+ (bcastMu: Vector) → drmSigma
+ }
+
+ /** Distributed Squared distance matrix computation. */
+ def dsqDist(drmX: DrmLike[Int]): DrmLike[Int] = {
+
+ // This is a specific case of pairwise distances of X and Y.
+
+ import RLikeDrmOps._
+
+ // Context needed
+ implicit val ctx = drmX.context
+
+ // Pin to cache if hasn't been pinned yet
+ val drmXcp = drmX.checkpoint()
+
+ // Compute column sum of squares
+ val s = drmXcp ^ 2 rowSums
+
+ val sBcast = drmBroadcast(s)
+
+ (drmXcp %*% drmXcp.t)
+
+ // Apply second part of the formula as per in-core algorithm
+ .mapBlock() { case (keys, block) ⇒
+
+ // Slurp broadcast to memory
+ val s = sBcast: Vector
+
+ // Update in-place
+ block := { (r, c, x) ⇒ s(keys(r)) + s(c) - 2 * x}
+
+ keys → block
+ }
+ }
+
+
+ /**
+ * Compute fold-in distances (distributed version). Here, we use pretty much the same math as with
+ * squared distances.
+ *
+ * D_sq = s*1' + 1*t' - 2*X*Y'
+ *
+ * where s is row sums of hadamard product(X, X), and, similarly,
+ * s is row sums of Hadamard product(Y, Y).
+ *
+ * @param drmX m x d row-wise dataset. Pinned to cache if not yet pinned.
+ * @param drmY n x d row-wise dataset. Pinned to cache if not yet pinned.
+ * @return m x d pairwise squared distance matrix (between rows of X and Y)
+ */
+ def dsqDist(drmX: DrmLike[Int], drmY: DrmLike[Int]): DrmLike[Int] = {
+
+ import RLikeDrmOps._
+
+ implicit val ctx = drmX.context
+
+ val drmXcp = drmX.checkpoint()
+ val drmYcp = drmY.checkpoint()
+
+ val sBcast = drmBroadcast(drmXcp ^ 2 rowSums)
+ val tBcast = drmBroadcast(drmYcp ^ 2 rowSums)
+
+ (drmX %*% drmY.t)
+
+ // Apply the rest of the formula
+ .mapBlock() { case (keys, block) =>
+
+ // Cache broadcast representations in local task variable
+ val s = sBcast: Vector
+ val t = tBcast: Vector
+
+ block := { (r, c, x) => s(keys(r)) + t(c) - 2 * x}
+ keys → block
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/349b94d8/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOps.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOps.scala
index 7091c53..e994e31 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOps.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOps.scala
@@ -108,8 +108,15 @@ class RLikeMatrixOps(m: Matrix) extends MatrixOps(m) {
}
def ^=(that: Double) = {
- m ::= { x ⇒ math.pow(x, that) }
- m
+ that match {
+ // Special handling of x ^2 and x ^ 0.5: we want consistent handling of x ^ 2 and x * x since
+ // pow(x,2) function return results different from x * x; but much of the code uses this
+ // interchangeably. Not having this done will create things like NaN entries on main diagonal
+ // of a distance matrix.
+ case 2.0 ⇒ m ::= { x ⇒ x * x }
+ case 0.5 ⇒ m ::= math.sqrt _
+ case _ ⇒ m ::= { x ⇒ math.pow(x, that) }
+ }
}
def ^(that: Double) = m.cloned ^= that
http://git-wip-us.apache.org/repos/asf/mahout/blob/349b94d8/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeVectorOps.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeVectorOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeVectorOps.scala
index 38a55d6..bf1bb30 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeVectorOps.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeVectorOps.scala
@@ -67,7 +67,15 @@ class RLikeVectorOps(_v: Vector) extends VectorOps(_v) {
/** Elementwise right-associative / */
def /:(that: Vector) = that.cloned /= v
- def ^=(that: Double) = v.assign(Functions.POW, that)
+ def ^=(that: Double) = that match {
+ // Special handling of x ^2 and x ^ 0.5: we want consistent handling of x ^ 2 and x * x since
+ // pow(x,2) function return results different from x * x; but much of the code uses this
+ // interchangeably. Not having this done will create things like NaN entries on main diagonal
+ // of a distance matrix.
+ case 2.0 ⇒ v.assign(Functions.SQUARE)
+ case 0.5 ⇒ v.assign(Functions.SQRT)
+ case _ ⇒ v.assign (Functions.POW, that)
+ }
def ^=(that: Vector) = v.assign(that, Functions.POW)
http://git-wip-us.apache.org/repos/asf/mahout/blob/349b94d8/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala
index 20dc9cd..7ff09bf 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala
@@ -125,34 +125,34 @@ package object scalabindings {
*/
def dense[R](rows: R*): DenseMatrix = {
import RLikeOps._
- val data = for (r <- rows) yield {
+ val data = for (r ← rows) yield {
r match {
- case n: Number => Array(n.doubleValue())
- case t: Vector => Array.tabulate(t.length)(t(_))
- case t: Array[Double] => t
- case t: Iterable[_] =>
+ case n: Number ⇒ Array(n.doubleValue())
+ case t: Vector ⇒ Array.tabulate(t.length)(t(_))
+ case t: Array[Double] ⇒ t
+ case t: Iterable[_] ⇒
t.head match {
- case ss: Double => t.asInstanceOf[Iterable[Double]].toArray
- case vv: Vector =>
+ case ss: Double ⇒ t.asInstanceOf[Iterable[Double]].toArray
+ case vv: Vector ⇒
val m = new DenseMatrix(t.size, t.head.asInstanceOf[Vector].length)
t.asInstanceOf[Iterable[Vector]].view.zipWithIndex.foreach {
- case (v, idx) => m(idx, ::) := v
+ case (v, idx) ⇒ m(idx, ::) := v
}
return m
}
- case t: Product => t.productIterator.map(_.asInstanceOf[Number].doubleValue()).toArray
- case t: Array[Array[Double]] => if (rows.size == 1)
+ case t: Product ⇒ t.productIterator.map(_.asInstanceOf[Number].doubleValue()).toArray
+ case t: Array[Array[Double]] ⇒ if (rows.size == 1)
return new DenseMatrix(t)
else
throw new IllegalArgumentException(
"double[][] data parameter can be the only argument for dense()")
- case t: Array[Vector] =>
+ case t: Array[Vector] ⇒
val m = new DenseMatrix(t.size, t.head.length)
t.view.zipWithIndex.foreach {
- case (v, idx) => m(idx, ::) := v
+ case (v, idx) ⇒ m(idx, ::) := v
}
return m
- case _ => throw new IllegalArgumentException("unsupported type in the inline Matrix initializer")
+ case _ ⇒ throw new IllegalArgumentException("unsupported type in the inline Matrix initializer")
}
}
new DenseMatrix(data.toArray)
@@ -179,7 +179,7 @@ package object scalabindings {
val nrow = rows.size
val ncol = rows.map(_.size()).max
val m = new SparseRowMatrix(nrow, ncol)
- m := rows.map { row =>
+ m := rows.map { row ⇒
if (row.length < ncol) {
val newRow = row.like(ncol)
newRow(0 until row.length) := row
@@ -200,7 +200,7 @@ package object scalabindings {
val cardinality = if (sdata.size > 0) sdata.map(_._1).max + 1 else 0
val initialCapacity = sdata.size
val sv = new RandomAccessSparseVector(cardinality, initialCapacity)
- sdata.foreach(t => sv.setQuick(t._1, t._2.asInstanceOf[Number].doubleValue()))
+ sdata.foreach(t ⇒ sv.setQuick(t._1, t._2.asInstanceOf[Number].doubleValue()))
sv
}
@@ -337,12 +337,64 @@ package object scalabindings {
/** Matrix-matrix unary func */
- type MMUnaryFunc = (Matrix, Option[Matrix]) => Matrix
+ type MMUnaryFunc = (Matrix, Option[Matrix]) ⇒ Matrix
/** Binary matrix-matrix operations which may save result in-place, optionally */
- type MMBinaryFunc = (Matrix, Matrix, Option[Matrix]) => Matrix
- type MVBinaryFunc = (Matrix, Vector, Option[Matrix]) => Matrix
- type VMBinaryFunc = (Vector, Matrix, Option[Matrix]) => Matrix
- type MDBinaryFunc = (Matrix, Double, Option[Matrix]) => Matrix
+ type MMBinaryFunc = (Matrix, Matrix, Option[Matrix]) ⇒ Matrix
+ type MVBinaryFunc = (Matrix, Vector, Option[Matrix]) ⇒ Matrix
+ type VMBinaryFunc = (Vector, Matrix, Option[Matrix]) ⇒ Matrix
+ type MDBinaryFunc = (Matrix, Double, Option[Matrix]) ⇒ Matrix
+ /////////////////////////////////////
+ // Miscellaneous in-core utilities
+
+ /**
+ * Compute column-wise means and variances.
+ *
+ * @return colMeans → colVariances
+ */
+ def colMeanVars(mxA:Matrix): (Vector, Vector) = {
+ val mu = mxA.colMeans()
+ val variance = (mxA * mxA colMeans) -= mu ^ 2
+ mu → variance
+ }
+
+ /**
+ * Compute column-wise means and stdevs.
+ * @param mxA input
+ * @return colMeans → colStdevs
+ */
+ def colMeanStdevs(mxA:Matrix) = {
+ val (mu, variance) = colMeanVars(mxA)
+ mu → (variance ::= math.sqrt _)
+ }
+
+ /** Compute square distance matrix. We assume data points are row-wise, similar to R's dist(). */
+ def sqDist(mxX: Matrix): Matrix = {
+
+ val s = mxX ^ 2 rowSums
+
+ (mxX %*% mxX.t) := { (r, c, x) ⇒ s(r) + s(c) - 2 * x}
+ }
+
+ /**
+ * Pairwise squared distance computation.
+ * @param mxX X, m x d
+ * @param mxY Y, n x d
+ * @return pairwise squaired distances of row-wise data points in X and Y (m x n)
+ */
+ def sqDist(mxX: Matrix, mxY: Matrix): Matrix = {
+
+ val s = mxX ^ 2 rowSums
+
+ val t = mxY ^ 2 rowSums
+
+ // D = s*1' + 1*t' - 2XY'
+ (mxX %*% mxY.t) := { (r, c, d) ⇒ s(r) + t(c) - 2.0 * d}
+ }
+
+ def dist(mxX: Matrix): Matrix = sqDist(mxX) := sqrt _
+
+ def dist(mxX: Matrix, mxY: Matrix): Matrix = sqDist(mxX, mxY) := sqrt _
+
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/349b94d8/math-scala/src/test/scala/org/apache/mahout/math/drm/DrmLikeOpsSuiteBase.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/test/scala/org/apache/mahout/math/drm/DrmLikeOpsSuiteBase.scala b/math-scala/src/test/scala/org/apache/mahout/math/drm/DrmLikeOpsSuiteBase.scala
index bb42121..fdfb3f9 100644
--- a/math-scala/src/test/scala/org/apache/mahout/math/drm/DrmLikeOpsSuiteBase.scala
+++ b/math-scala/src/test/scala/org/apache/mahout/math/drm/DrmLikeOpsSuiteBase.scala
@@ -110,4 +110,28 @@ trait DrmLikeOpsSuiteBase extends DistributedMahoutSuite with Matchers {
}
+ test("dsqDist(X,Y)") {
+ val m = 100
+ val n = 300
+ val d = 7
+ val mxX = Matrices.symmetricUniformView(m, d, 12345).cloned -= 5
+ val mxY = Matrices.symmetricUniformView(n, d, 1234).cloned += 10
+ val (drmX, drmY) = (drmParallelize(mxX, 3), drmParallelize(mxY, 4))
+
+ val mxDsq = dsqDist(drmX, drmY).collect
+ val mxDsqControl = new DenseMatrix(m, n) := { (r, c, _) ⇒ (mxX(r, ::) - mxY(c, ::)) ^= 2 sum }
+ (mxDsq - mxDsqControl).norm should be < 1e-7
+ }
+
+ test("dsqDist(X)") {
+ val m = 100
+ val d = 7
+ val mxX = Matrices.symmetricUniformView(m, d, 12345).cloned -= 5
+ val drmX = drmParallelize(mxX, 3)
+
+ val mxDsq = dsqDist(drmX).collect
+ val mxDsqControl = sqDist(drmX)
+ (mxDsq - mxDsqControl).norm should be < 1e-7
+ }
+
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/349b94d8/math-scala/src/test/scala/org/apache/mahout/math/scalabindings/MathSuite.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/test/scala/org/apache/mahout/math/scalabindings/MathSuite.scala b/math-scala/src/test/scala/org/apache/mahout/math/scalabindings/MathSuite.scala
index b10cde3..bcfe109 100644
--- a/math-scala/src/test/scala/org/apache/mahout/math/scalabindings/MathSuite.scala
+++ b/math-scala/src/test/scala/org/apache/mahout/math/scalabindings/MathSuite.scala
@@ -17,6 +17,7 @@
package org.apache.mahout.math.scalabindings
+import org.apache.mahout.logging._
import org.scalatest.{Matchers, FunSuite}
import org.apache.mahout.math._
import scala.math._
@@ -28,6 +29,8 @@ import org.apache.mahout.common.RandomUtils
class MathSuite extends FunSuite with MahoutSuite {
+ private final implicit val log = getLog(classOf[MathSuite])
+
test("chol") {
// try to solve Ax=b with cholesky:
@@ -41,26 +44,26 @@ class MathSuite extends FunSuite with MahoutSuite {
// make sure it is symmetric for a valid solution
a := a.t %*% a
- printf("A= \n%s\n", a)
+ trace(s"A= \n$a")
val b = dense((9, 8, 7)).t
- printf("b = \n%s\n", b)
+ trace(s"b = \n$b")
- // fails if chol(a,true)
+ // Fails if chol(a, true)
val ch = chol(a)
- printf("L = \n%s\n", ch.getL)
+ trace(s"L = \n${ch.getL}")
- printf("(L^-1)b =\n%s\n", ch.solveLeft(b))
+ trace(s"(L^-1)b =\n${ch.solveLeft(b)}\n")
val x = ch.solveRight(eye(3)) %*% ch.solveLeft(b)
- printf("x = \n%s\n", x.toString)
+ trace(s"x = \n$x")
val axmb = (a %*% x) - b
- printf("AX - B = \n%s\n", axmb.toString)
+ trace(s"AX - B = \n$axmb")
axmb.norm should be < 1e-10
@@ -211,4 +214,26 @@ class MathSuite extends FunSuite with MahoutSuite {
}
+ test("sqDist(X,Y)") {
+ val m = 100
+ val n = 300
+ val d = 7
+ val mxX = Matrices.symmetricUniformView(m, d, 12345).cloned -= 5
+ val mxY = Matrices.symmetricUniformView(n, d, 1234).cloned += 10
+
+ val mxDsq = sqDist(mxX, mxY)
+ val mxDsqControl = new DenseMatrix(m, n) := { (r, c, _) ⇒ (mxX(r, ::) - mxY(c, ::)) ^= 2 sum }
+ (mxDsq - mxDsqControl).norm should be < 1e-7
+ }
+
+ test("sqDist(X)") {
+ val m = 100
+ val d = 7
+ val mxX = Matrices.symmetricUniformView(m, d, 12345).cloned -= 5
+
+ val mxDsq = sqDist(mxX)
+ val mxDsqControl = sqDist(mxX, mxX)
+ (mxDsq - mxDsqControl).norm should be < 1e-7
+ }
+
}