You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by dl...@apache.org on 2014/08/15 21:09:39 UTC

git commit: MAHOUT-1606 - Add rowSums, rowMeans and diagonal extraction operations to distributed matrices this closes apache/mahout#43

Repository: mahout
Updated Branches:
  refs/heads/master 4bac5c295 -> 25c9ed0eb


MAHOUT-1606 - Add rowSums, rowMeans and diagonal extraction operations to distributed matrices
this closes apache/mahout#43

Squashed commit of the following:

commit cd819a5df79e43d558d450ef75c76ae47fcd525b
Merge: fd54249 4bac5c2
Author: Dmitriy Lyubimov <dl...@apache.org>
Date:   Fri Aug 15 12:07:21 2014 -0700

    Merge branch 'master' into rowmeans

commit fd54249c20a5c89c96060b6d96e49ada579d8144
Author: Dmitriy Lyubimov <dl...@apache.org>
Date:   Tue Aug 12 16:15:32 2014 -0700

    rowSums, rowMens and diagonal extraction for distributed matrices


Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/25c9ed0e
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/25c9ed0e
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/25c9ed0e

Branch: refs/heads/master
Commit: 25c9ed0ebeb34a3ef43a48fa47ea0340a049aac0
Parents: 4bac5c2
Author: Dmitriy Lyubimov <dl...@apache.org>
Authored: Fri Aug 15 12:08:33 2014 -0700
Committer: Dmitriy Lyubimov <dl...@apache.org>
Committed: Fri Aug 15 12:08:33 2014 -0700

----------------------------------------------------------------------
 CHANGELOG                                       |  2 ++
 .../apache/mahout/math/drm/RLikeDrmOps.scala    | 33 ++++++++++++++++++++
 .../mahout/math/drm/RLikeDrmOpsSuiteBase.scala  | 23 ++++++++++++++
 3 files changed, 58 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mahout/blob/25c9ed0e/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index aefb838..a0d8507 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,8 @@ Mahout Change Log
 
 Release 1.0 - unreleased
 
+  MAHOUT-1606 - Add rowSums, rowMeans and diagonal extraction operations to distributed matrices (dlyubimov)
+
   MAHOUT-1603: Tweaks for Spark 1.0.x (dlyubimov & pferrel)
 
   MAHOUT-1596: implement rbind() operator (Anand Avati and dlyubimov)

http://git-wip-us.apache.org/repos/asf/mahout/blob/25c9ed0e/math-scala/src/main/scala/org/apache/mahout/math/drm/RLikeDrmOps.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/RLikeDrmOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/drm/RLikeDrmOps.scala
index ae5da71..15a0975 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/drm/RLikeDrmOps.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/drm/RLikeDrmOps.scala
@@ -68,12 +68,45 @@ class RLikeDrmOps[K: ClassTag](drm: DrmLike[K]) extends DrmLikeOps[K](drm) {
 
 class RLikeDrmIntOps(drm: DrmLike[Int]) extends RLikeDrmOps[Int](drm) {
 
+  import org.apache.mahout.math._
+  import scalabindings._
+  import RLikeOps._
+  import RLikeDrmOps._
+  import scala.collection.JavaConversions._
+
   override def t: DrmLike[Int] = OpAt(A = drm)
 
   def %*%:[K: ClassTag](that: DrmLike[K]): DrmLike[K] = OpAB[K](A = that, B = this.drm)
 
   def %*%:(that: Matrix): DrmLike[Int] = OpTimesLeftMatrix(left = that, A = this.drm)
 
+  /** Row sums. This is of course applicable to Int-keyed distributed matrices only. */
+  def rowSums(): Vector = {
+    drm.mapBlock(ncol = 1) { case (keys, block) =>
+      // Collect block-wise rowsums and output them as one-column matrix.
+      keys -> dense(block.rowSums).t
+    }
+        .collect(::, 0)
+  }
+
+  /** Row means */
+  def rowMeans(): Vector = {
+    drm.mapBlock(ncol = 1) { case (keys, block) =>
+      // Collect block-wise row means and output them as one-column matrix.
+      keys -> dense(block.rowMeans).t
+    }
+        .collect(::, 0)
+  }
+
+  /** Return diagonal vector */
+  def diagv: Vector = {
+    require(drm.ncol == drm.nrow, "Must be square to extract diagonal")
+    drm.mapBlock(ncol = 1) { case (keys, block) =>
+      keys -> dense(for (r <- block.view) yield r(keys(r.index))).t
+    }
+        .collect(::, 0)
+  }
+
 }
 
 object RLikeDrmOps {

http://git-wip-us.apache.org/repos/asf/mahout/blob/25c9ed0e/math-scala/src/test/scala/org/apache/mahout/math/drm/RLikeDrmOpsSuiteBase.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/test/scala/org/apache/mahout/math/drm/RLikeDrmOpsSuiteBase.scala b/math-scala/src/test/scala/org/apache/mahout/math/drm/RLikeDrmOpsSuiteBase.scala
index 3f37bb9..76f4624 100644
--- a/math-scala/src/test/scala/org/apache/mahout/math/drm/RLikeDrmOpsSuiteBase.scala
+++ b/math-scala/src/test/scala/org/apache/mahout/math/drm/RLikeDrmOpsSuiteBase.scala
@@ -444,6 +444,29 @@ trait RLikeDrmOpsSuiteBase extends DistributedMahoutSuite with Matchers {
     drmA.colMeans() should equal(inCoreA.colMeans())
   }
 
+  test("rowSums, rowMeans") {
+    val inCoreA = dense(
+      (1, 2),
+      (3, 4),
+      (20, 30)
+    )
+    val drmA = drmParallelize(inCoreA, numPartitions = 2)
+
+    drmA.rowSums() should equal(inCoreA.rowSums())
+    drmA.rowMeans() should equal(inCoreA.rowMeans())
+  }
+
+  test("A.diagv") {
+    val inCoreA = dense(
+      (1, 2, 3),
+      (3, 4, 5),
+      (20, 30, 7)
+    )
+    val drmA = drmParallelize(inCoreA, numPartitions = 2)
+
+    drmA.diagv should equal(inCoreA.diagv)
+  }
+
   test("numNonZeroElementsPerColumn") {
     val inCoreA = dense(
       (0, 2),