You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by dl...@apache.org on 2014/08/15 21:09:39 UTC
git commit: MAHOUT-1606 - Add rowSums,
rowMeans and diagonal extraction operations to distributed matrices
this closes apache/mahout#43
Repository: mahout
Updated Branches:
refs/heads/master 4bac5c295 -> 25c9ed0eb
MAHOUT-1606 - Add rowSums, rowMeans and diagonal extraction operations to distributed matrices
this closes apache/mahout#43
Squashed commit of the following:
commit cd819a5df79e43d558d450ef75c76ae47fcd525b
Merge: fd54249 4bac5c2
Author: Dmitriy Lyubimov <dl...@apache.org>
Date: Fri Aug 15 12:07:21 2014 -0700
Merge branch 'master' into rowmeans
commit fd54249c20a5c89c96060b6d96e49ada579d8144
Author: Dmitriy Lyubimov <dl...@apache.org>
Date: Tue Aug 12 16:15:32 2014 -0700
rowSums, rowMens and diagonal extraction for distributed matrices
Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/25c9ed0e
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/25c9ed0e
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/25c9ed0e
Branch: refs/heads/master
Commit: 25c9ed0ebeb34a3ef43a48fa47ea0340a049aac0
Parents: 4bac5c2
Author: Dmitriy Lyubimov <dl...@apache.org>
Authored: Fri Aug 15 12:08:33 2014 -0700
Committer: Dmitriy Lyubimov <dl...@apache.org>
Committed: Fri Aug 15 12:08:33 2014 -0700
----------------------------------------------------------------------
CHANGELOG | 2 ++
.../apache/mahout/math/drm/RLikeDrmOps.scala | 33 ++++++++++++++++++++
.../mahout/math/drm/RLikeDrmOpsSuiteBase.scala | 23 ++++++++++++++
3 files changed, 58 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mahout/blob/25c9ed0e/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index aefb838..a0d8507 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,8 @@ Mahout Change Log
Release 1.0 - unreleased
+ MAHOUT-1606 - Add rowSums, rowMeans and diagonal extraction operations to distributed matrices (dlyubimov)
+
MAHOUT-1603: Tweaks for Spark 1.0.x (dlyubimov & pferrel)
MAHOUT-1596: implement rbind() operator (Anand Avati and dlyubimov)
http://git-wip-us.apache.org/repos/asf/mahout/blob/25c9ed0e/math-scala/src/main/scala/org/apache/mahout/math/drm/RLikeDrmOps.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/RLikeDrmOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/drm/RLikeDrmOps.scala
index ae5da71..15a0975 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/drm/RLikeDrmOps.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/drm/RLikeDrmOps.scala
@@ -68,12 +68,45 @@ class RLikeDrmOps[K: ClassTag](drm: DrmLike[K]) extends DrmLikeOps[K](drm) {
class RLikeDrmIntOps(drm: DrmLike[Int]) extends RLikeDrmOps[Int](drm) {
+ import org.apache.mahout.math._
+ import scalabindings._
+ import RLikeOps._
+ import RLikeDrmOps._
+ import scala.collection.JavaConversions._
+
override def t: DrmLike[Int] = OpAt(A = drm)
def %*%:[K: ClassTag](that: DrmLike[K]): DrmLike[K] = OpAB[K](A = that, B = this.drm)
def %*%:(that: Matrix): DrmLike[Int] = OpTimesLeftMatrix(left = that, A = this.drm)
+ /** Row sums. This is of course applicable to Int-keyed distributed matrices only. */
+ def rowSums(): Vector = {
+ drm.mapBlock(ncol = 1) { case (keys, block) =>
+ // Collect block-wise rowsums and output them as one-column matrix.
+ keys -> dense(block.rowSums).t
+ }
+ .collect(::, 0)
+ }
+
+ /** Row means */
+ def rowMeans(): Vector = {
+ drm.mapBlock(ncol = 1) { case (keys, block) =>
+ // Collect block-wise row means and output them as one-column matrix.
+ keys -> dense(block.rowMeans).t
+ }
+ .collect(::, 0)
+ }
+
+ /** Return diagonal vector */
+ def diagv: Vector = {
+ require(drm.ncol == drm.nrow, "Must be square to extract diagonal")
+ drm.mapBlock(ncol = 1) { case (keys, block) =>
+ keys -> dense(for (r <- block.view) yield r(keys(r.index))).t
+ }
+ .collect(::, 0)
+ }
+
}
object RLikeDrmOps {
http://git-wip-us.apache.org/repos/asf/mahout/blob/25c9ed0e/math-scala/src/test/scala/org/apache/mahout/math/drm/RLikeDrmOpsSuiteBase.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/test/scala/org/apache/mahout/math/drm/RLikeDrmOpsSuiteBase.scala b/math-scala/src/test/scala/org/apache/mahout/math/drm/RLikeDrmOpsSuiteBase.scala
index 3f37bb9..76f4624 100644
--- a/math-scala/src/test/scala/org/apache/mahout/math/drm/RLikeDrmOpsSuiteBase.scala
+++ b/math-scala/src/test/scala/org/apache/mahout/math/drm/RLikeDrmOpsSuiteBase.scala
@@ -444,6 +444,29 @@ trait RLikeDrmOpsSuiteBase extends DistributedMahoutSuite with Matchers {
drmA.colMeans() should equal(inCoreA.colMeans())
}
+ test("rowSums, rowMeans") {
+ val inCoreA = dense(
+ (1, 2),
+ (3, 4),
+ (20, 30)
+ )
+ val drmA = drmParallelize(inCoreA, numPartitions = 2)
+
+ drmA.rowSums() should equal(inCoreA.rowSums())
+ drmA.rowMeans() should equal(inCoreA.rowMeans())
+ }
+
+ test("A.diagv") {
+ val inCoreA = dense(
+ (1, 2, 3),
+ (3, 4, 5),
+ (20, 30, 7)
+ )
+ val drmA = drmParallelize(inCoreA, numPartitions = 2)
+
+ drmA.diagv should equal(inCoreA.diagv)
+ }
+
test("numNonZeroElementsPerColumn") {
val inCoreA = dense(
(0, 2),