You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by dl...@apache.org on 2014/06/10 20:34:17 UTC
git commit: MAHOUT-1572: blockify() to detect (naively) the data
sparsity in the loaded data
Repository: mahout
Updated Branches:
refs/heads/master 907781bb8 -> 8c529ccff
MAHOUT-1572: blockify() to detect (naively) the data sparsity in the loaded data
This closes apache/mahout#10
Squashed commit of the following:
commit cc93dedcad0b6ff0365f8e15dba280221a0a64f0
Author: Dmitriy Lyubimov <dl...@apache.org>
Date: Tue Jun 10 11:27:38 2014 -0700
+ tests
commit 07180efbfa4472f7d13a2afe1f8f37e18edfe08e
Merge: 162c5ca 907781b
Author: Dmitriy Lyubimov <dl...@apache.org>
Date: Tue Jun 10 11:04:51 2014 -0700
Merge branch 'master' into MAHOUT-1572
commit 162c5ca36e00af91a9599075332c577d9b1a13c4
Author: Dmitriy Lyubimov <dl...@apache.org>
Date: Wed Jun 4 15:10:11 2014 -0700
initial fix (?)
Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/8c529ccf
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/8c529ccf
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/8c529ccf
Branch: refs/heads/master
Commit: 8c529ccff23d419c4cb5191b0435de40d6a9831c
Parents: 907781b
Author: Dmitriy Lyubimov <dl...@apache.org>
Authored: Tue Jun 10 11:30:55 2014 -0700
Committer: Dmitriy Lyubimov <dl...@apache.org>
Committed: Tue Jun 10 11:30:55 2014 -0700
----------------------------------------------------------------------
CHANGELOG | 2 ++
.../mahout/sparkbindings/drm/package.scala | 12 +++++++-
.../mahout/sparkbindings/drm/DrmLikeSuite.scala | 30 ++++++++++++++++++++
3 files changed, 43 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mahout/blob/8c529ccf/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index 2e174c5..2f604e1 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,8 @@ Mahout Change Log
Release 1.0 - unreleased
+ MAHOUT-1572: blockify() to detect (naively) the data sparsity in the loaded data (dlyubimov)
+
MAHOUT-1571: Functional Views are not serialized as dense/sparse correctly (dlyubimov)
MAHOUT-1566: (Experimental) Regular ALS factorizer with conversion tests, optimizer enhancements and bug fixes (dlyubimov)
http://git-wip-us.apache.org/repos/asf/mahout/blob/8c529ccf/spark/src/main/scala/org/apache/mahout/sparkbindings/drm/package.scala
----------------------------------------------------------------------
diff --git a/spark/src/main/scala/org/apache/mahout/sparkbindings/drm/package.scala b/spark/src/main/scala/org/apache/mahout/sparkbindings/drm/package.scala
index 37a9ac2..2a2a4a9 100644
--- a/spark/src/main/scala/org/apache/mahout/sparkbindings/drm/package.scala
+++ b/spark/src/main/scala/org/apache/mahout/sparkbindings/drm/package.scala
@@ -65,7 +65,17 @@ package object drm {
val keys = data.map(t => t._1).toArray[K]
val vectors = data.map(t => t._2).toArray
- val block = new SparseRowMatrix(vectors.size, blockncol, vectors)
+ val block = if (vectors(0).isDense) {
+ val block = new DenseMatrix(vectors.size, blockncol)
+ var row = 0
+ while (row < vectors.size) {
+ block(row, ::) := vectors(row)
+ row += 1
+ }
+ block
+ } else {
+ new SparseRowMatrix(vectors.size, blockncol, vectors)
+ }
Iterator(keys -> block)
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/8c529ccf/spark/src/test/scala/org/apache/mahout/sparkbindings/drm/DrmLikeSuite.scala
----------------------------------------------------------------------
diff --git a/spark/src/test/scala/org/apache/mahout/sparkbindings/drm/DrmLikeSuite.scala b/spark/src/test/scala/org/apache/mahout/sparkbindings/drm/DrmLikeSuite.scala
index caccb70..3c7e7f9 100644
--- a/spark/src/test/scala/org/apache/mahout/sparkbindings/drm/DrmLikeSuite.scala
+++ b/spark/src/test/scala/org/apache/mahout/sparkbindings/drm/DrmLikeSuite.scala
@@ -22,6 +22,7 @@ import org.apache.mahout.math._
import scalabindings._
import drm._
import RLikeOps._
+import RLikeDrmOps._
import org.apache.mahout.sparkbindings.test.MahoutLocalContext
@@ -52,6 +53,35 @@ class DrmLikeSuite extends FunSuite with MahoutLocalContext {
println(inCoreB)
}
+
+ test("DRM blockify dense") {
+
+ val inCoreA = dense((1, 2, 3), (3, 4, 5))
+ val drmA = drmParallelize(inCoreA, numPartitions = 2)
+
+ (inCoreA - drmA.mapBlock() {
+ case (keys, block) =>
+ if (!block.isInstanceOf[DenseMatrix])
+ throw new AssertionError("Block must be dense.")
+ keys -> block
+ }).norm should be < 1e-4
+ }
+
+ test("DRM blockify sparse -> SRM") {
+
+ val inCoreA = sparse(
+ (1, 2, 3),
+ 0 -> 3 :: 2 -> 5 :: Nil
+ )
+ val drmA = drmParallelize(inCoreA, numPartitions = 2)
+
+ (inCoreA - drmA.mapBlock() {
+ case (keys, block) =>
+ if (!block.isInstanceOf[SparseRowMatrix])
+ throw new AssertionError("Block must be dense.")
+ keys -> block
+ }).norm should be < 1e-4
+ }
test("DRM parallelizeEmpty") {