You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by dl...@apache.org on 2014/08/06 21:39:13 UTC
git commit: MAHOUT-1597: A + 1.0 (fixes)
Repository: mahout
Updated Branches:
refs/heads/master e5bc885fd -> 7a50a291b
MAHOUT-1597: A + 1.0 (fixes)
Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/7a50a291
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/7a50a291
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/7a50a291
Branch: refs/heads/master
Commit: 7a50a291b4598e9809f9acf609b92175ce7f953b
Parents: e5bc885
Author: Dmitriy Lyubimov <dl...@apache.org>
Authored: Wed Aug 6 12:30:51 2014 -0700
Committer: Dmitriy Lyubimov <dl...@apache.org>
Committed: Wed Aug 6 12:33:03 2014 -0700
----------------------------------------------------------------------
.../org/apache/mahout/math/drm/logical/OpAewScalar.scala | 6 +++++-
.../mahout/sparkbindings/drm/CheckpointedDrmSpark.scala | 2 +-
.../org/apache/mahout/sparkbindings/drm/DrmLikeSuite.scala | 8 ++++++++
3 files changed, 14 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mahout/blob/7a50a291/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAewScalar.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAewScalar.scala b/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAewScalar.scala
index 3b651f6..19a910c 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAewScalar.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAewScalar.scala
@@ -19,6 +19,7 @@ package org.apache.mahout.math.drm.logical
import scala.reflect.ClassTag
import org.apache.mahout.math.drm.DrmLike
+import scala.util.Random
/** Operator denoting expressions like 5.0 - A or A * 5.6 */
case class OpAewScalar[K: ClassTag](
@@ -27,7 +28,10 @@ case class OpAewScalar[K: ClassTag](
val op: String
) extends AbstractUnaryOp[K,K] {
- override protected[mahout] lazy val partitioningTag: Long = A.partitioningTag
+ override protected[mahout] lazy val partitioningTag: Long =
+ if (A.canHaveMissingRows)
+ Random.nextLong()
+ else A.partitioningTag
/** Stuff like `A +1` is always supposed to fix this */
override protected[mahout] lazy val canHaveMissingRows: Boolean = false
http://git-wip-us.apache.org/repos/asf/mahout/blob/7a50a291/spark/src/main/scala/org/apache/mahout/sparkbindings/drm/CheckpointedDrmSpark.scala
----------------------------------------------------------------------
diff --git a/spark/src/main/scala/org/apache/mahout/sparkbindings/drm/CheckpointedDrmSpark.scala b/spark/src/main/scala/org/apache/mahout/sparkbindings/drm/CheckpointedDrmSpark.scala
index 03050bb..1c5546b 100644
--- a/spark/src/main/scala/org/apache/mahout/sparkbindings/drm/CheckpointedDrmSpark.scala
+++ b/spark/src/main/scala/org/apache/mahout/sparkbindings/drm/CheckpointedDrmSpark.scala
@@ -180,7 +180,7 @@ class CheckpointedDrmSpark[K: ClassTag](
val maxPlus1 = rdd.map(_._1.asInstanceOf[Int]).fold(-1)(max(_, _)) + 1L
val rowCount = rdd.count()
_canHaveMissingRows = maxPlus1 != rowCount ||
- rdd.map(_._1).sum().toLong != ((rowCount -1.0 ) * (rowCount -2.0) /2.0).toLong
+ rdd.map(_._1).sum().toLong != (rowCount * (rowCount - 1.0) / 2.0).toLong
intFixExtra = (maxPlus1 - rowCount) max 0L
maxPlus1
} else
http://git-wip-us.apache.org/repos/asf/mahout/blob/7a50a291/spark/src/test/scala/org/apache/mahout/sparkbindings/drm/DrmLikeSuite.scala
----------------------------------------------------------------------
diff --git a/spark/src/test/scala/org/apache/mahout/sparkbindings/drm/DrmLikeSuite.scala b/spark/src/test/scala/org/apache/mahout/sparkbindings/drm/DrmLikeSuite.scala
index c47f7f1..a5cb7f8 100644
--- a/spark/src/test/scala/org/apache/mahout/sparkbindings/drm/DrmLikeSuite.scala
+++ b/spark/src/test/scala/org/apache/mahout/sparkbindings/drm/DrmLikeSuite.scala
@@ -28,6 +28,14 @@ import org.apache.mahout.sparkbindings.test.DistributedSparkSuite
/** DRMLike tests -- just run common DRM tests in Spark. */
class DrmLikeSuite extends FunSuite with DistributedSparkSuite with DrmLikeSuiteBase {
+
+ test("drmParallellize produces drm with no missing rows") {
+ val inCoreA = dense((1, 2, 3), (3, 4, 5))
+ val drmA = drmParallelize(inCoreA, numPartitions = 2)
+
+ drmA.canHaveMissingRows shouldBe false
+ }
+
test("DRM blockify dense") {
val inCoreA = dense((1, 2, 3), (3, 4, 5))