You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by dl...@apache.org on 2014/06/10 20:34:17 UTC

git commit: MAHOUT-1572: blockify() to detect (naively) the data sparsity in the loaded data

Repository: mahout
Updated Branches:
  refs/heads/master 907781bb8 -> 8c529ccff


MAHOUT-1572: blockify() to detect (naively) the data sparsity in the loaded data

This closes apache/mahout#10

Squashed commit of the following:

commit cc93dedcad0b6ff0365f8e15dba280221a0a64f0
Author: Dmitriy Lyubimov <dl...@apache.org>
Date:   Tue Jun 10 11:27:38 2014 -0700

    + tests

commit 07180efbfa4472f7d13a2afe1f8f37e18edfe08e
Merge: 162c5ca 907781b
Author: Dmitriy Lyubimov <dl...@apache.org>
Date:   Tue Jun 10 11:04:51 2014 -0700

    Merge branch 'master' into MAHOUT-1572

commit 162c5ca36e00af91a9599075332c577d9b1a13c4
Author: Dmitriy Lyubimov <dl...@apache.org>
Date:   Wed Jun 4 15:10:11 2014 -0700

    initial fix (?)


Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/8c529ccf
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/8c529ccf
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/8c529ccf

Branch: refs/heads/master
Commit: 8c529ccff23d419c4cb5191b0435de40d6a9831c
Parents: 907781b
Author: Dmitriy Lyubimov <dl...@apache.org>
Authored: Tue Jun 10 11:30:55 2014 -0700
Committer: Dmitriy Lyubimov <dl...@apache.org>
Committed: Tue Jun 10 11:30:55 2014 -0700

----------------------------------------------------------------------
 CHANGELOG                                       |  2 ++
 .../mahout/sparkbindings/drm/package.scala      | 12 +++++++-
 .../mahout/sparkbindings/drm/DrmLikeSuite.scala | 30 ++++++++++++++++++++
 3 files changed, 43 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mahout/blob/8c529ccf/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index 2e174c5..2f604e1 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,8 @@ Mahout Change Log
 
 Release 1.0 - unreleased
 
+  MAHOUT-1572: blockify() to detect (naively) the data sparsity in the loaded data (dlyubimov)
+
   MAHOUT-1571: Functional Views are not serialized as dense/sparse correctly (dlyubimov)
 
   MAHOUT-1566: (Experimental) Regular ALS factorizer with conversion tests, optimizer enhancements and bug fixes (dlyubimov)

http://git-wip-us.apache.org/repos/asf/mahout/blob/8c529ccf/spark/src/main/scala/org/apache/mahout/sparkbindings/drm/package.scala
----------------------------------------------------------------------
diff --git a/spark/src/main/scala/org/apache/mahout/sparkbindings/drm/package.scala b/spark/src/main/scala/org/apache/mahout/sparkbindings/drm/package.scala
index 37a9ac2..2a2a4a9 100644
--- a/spark/src/main/scala/org/apache/mahout/sparkbindings/drm/package.scala
+++ b/spark/src/main/scala/org/apache/mahout/sparkbindings/drm/package.scala
@@ -65,7 +65,17 @@ package object drm {
         val keys = data.map(t => t._1).toArray[K]
         val vectors = data.map(t => t._2).toArray
 
-        val block = new SparseRowMatrix(vectors.size, blockncol, vectors)
+        val block = if (vectors(0).isDense) {
+          val block = new DenseMatrix(vectors.size, blockncol)
+          var row = 0
+          while (row < vectors.size) {
+            block(row, ::) := vectors(row)
+            row += 1
+          }
+          block
+        } else {
+          new SparseRowMatrix(vectors.size, blockncol, vectors)
+        }
 
         Iterator(keys -> block)
       }

http://git-wip-us.apache.org/repos/asf/mahout/blob/8c529ccf/spark/src/test/scala/org/apache/mahout/sparkbindings/drm/DrmLikeSuite.scala
----------------------------------------------------------------------
diff --git a/spark/src/test/scala/org/apache/mahout/sparkbindings/drm/DrmLikeSuite.scala b/spark/src/test/scala/org/apache/mahout/sparkbindings/drm/DrmLikeSuite.scala
index caccb70..3c7e7f9 100644
--- a/spark/src/test/scala/org/apache/mahout/sparkbindings/drm/DrmLikeSuite.scala
+++ b/spark/src/test/scala/org/apache/mahout/sparkbindings/drm/DrmLikeSuite.scala
@@ -22,6 +22,7 @@ import org.apache.mahout.math._
 import scalabindings._
 import drm._
 import RLikeOps._
+import RLikeDrmOps._
 import org.apache.mahout.sparkbindings.test.MahoutLocalContext
 
 
@@ -52,6 +53,35 @@ class DrmLikeSuite extends FunSuite with MahoutLocalContext {
     println(inCoreB)
 
   }
+  
+  test("DRM blockify dense") {
+
+    val inCoreA = dense((1, 2, 3), (3, 4, 5))
+    val drmA = drmParallelize(inCoreA, numPartitions = 2)
+
+    (inCoreA - drmA.mapBlock() {
+      case (keys, block) =>
+        if (!block.isInstanceOf[DenseMatrix])
+          throw new AssertionError("Block must be dense.")
+        keys -> block
+    }).norm should be < 1e-4
+  }
+
+  test("DRM blockify sparse -> SRM") {
+
+    val inCoreA = sparse(
+      (1, 2, 3),
+      0 -> 3 :: 2 -> 5 :: Nil
+    )
+    val drmA = drmParallelize(inCoreA, numPartitions = 2)
+
+    (inCoreA - drmA.mapBlock() {
+      case (keys, block) =>
+        if (!block.isInstanceOf[SparseRowMatrix])
+          throw new AssertionError("Block must be dense.")
+        keys -> block
+    }).norm should be < 1e-4
+  }
 
   test("DRM parallelizeEmpty") {